136 files changed, 1433 insertions, 490 deletions
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 91dad63e5a2d..2756dcd5de6e 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -365,3 +365,4 @@ struct file_system_type v9fs_fs_type = {
        .owner = THIS_MODULE,
        .fs_flags = FS_RENAME_DOES_D_MOVE,
 };
+MODULE_ALIAS_FS("9p");
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d57122935793..0ff4bae2c2a2 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -524,6 +524,7 @@ static struct file_system_type adfs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("adfs");
 static int __init init_adfs_fs(void)
 {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index b84dc7352502..45161a832bbc 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -622,6 +622,7 @@ static struct file_system_type affs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("affs");
 static int __init init_affs_fs(void)
 {
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7c31ec399575..c4861557e385 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -45,6 +45,7 @@ struct file_system_type afs_fs_type = {
        .kill_sb        = afs_kill_super,
        .fs_flags       = 0,
 };
+MODULE_ALIAS_FS("afs");
 static const struct super_operations afs_super_ops = {
        .statfs         = afs_statfs,
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index cddc74b9cdb2..b3db517e89ec 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -26,6 +26,7 @@ static struct file_system_type autofs_fs_type = {
        .mount          = autofs_mount,
        .kill_sb        = autofs4_kill_sb,
 };
+MODULE_ALIAS_FS("autofs");
 static int __init init_autofs4_fs(void)
 {
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index c8f4e25eb9e2..8615ee89ab55 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -951,6 +951,7 @@ static struct file_system_type befs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,      
 };
+MODULE_ALIAS_FS("befs");
 static int __init
 init_befs_fs(void)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 737aaa3f7090..5e376bb93419 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -473,6 +473,7 @@ static struct file_system_type bfs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("bfs");
 static int __init init_bfs_fs(void)
 {
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index fecbbf3f8ff2..751df5e4f61a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -720,6 +720,7 @@ static struct file_system_type bm_fs_type = {
        .mount          = bm_mount,
        .kill_sb        = kill_litter_super,
 };
+MODULE_ALIAS_FS("binfmt_misc");
 static int __init init_misc_binfmt(void)
 {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ecd25a1b4e51..ca9d8f1a3bb6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
        if (tree_mod_dont_log(fs_info, NULL))
                return 0;
+        __tree_mod_log_free_eb(fs_info, old_root);
        ret = tree_mod_alloc(fs_info, flags, &tm);
        if (ret < 0)
                goto out;
@@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
 static noinline void
 tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
                     struct extent_buffer *src, unsigned long dst_offset,
-                     unsigned long src_offset, int nr_items)
+                     unsigned long src_offset, int nr_items, int log_removal)
 {
        int ret;
        int i;
@@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
        }
        for (i = 0; i < nr_items; i++) {
-                ret = tree_mod_log_insert_key_locked(fs_info, src,
+                if (log_removal) {
-                                                     i + src_offset,
+                        ret = tree_mod_log_insert_key_locked(fs_info, src,
-                                                     MOD_LOG_KEY_REMOVE);
+                                                        i + src_offset,
-                BUG_ON(ret < 0);
+                                                        MOD_LOG_KEY_REMOVE);
+                        BUG_ON(ret < 0);
+                }
                ret = tree_mod_log_insert_key_locked(fs_info, dst,
                                                     i + dst_offset,
                                                     MOD_LOG_KEY_ADD);
@@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        ret = btrfs_dec_ref(trans, root, buf, 1, 1);
                        BUG_ON(ret); /* -ENOMEM */
                }
-                tree_mod_log_free_eb(root->fs_info, buf);
                clean_tree_block(trans, root, buf);
                *last_ref = 1;
        }
@@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                btrfs_set_node_ptr_generation(parent, parent_slot,
                                              trans->transid);
                btrfs_mark_buffer_dirty(parent);
+                tree_mod_log_free_eb(root->fs_info, buf);
                btrfs_free_tree_block(trans, root, buf, parent_start,
                                      last_ref);
        }
@@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        goto enospc;
                }
-                tree_mod_log_free_eb(root->fs_info, root->node);
                tree_mod_log_set_root_pointer(root, child);
                rcu_assign_pointer(root->node, child);
@@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
                push_items = min(src_nritems - 8, push_items);
        tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
-                             push_items);
+                             push_items, 1);
        copy_extent_buffer(dst, src,
                           btrfs_node_key_ptr_offset(dst_nritems),
                           btrfs_node_key_ptr_offset(0),
@@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
                                      sizeof(struct btrfs_key_ptr));
        tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
-                             src_nritems - push_items, push_items);
+                             src_nritems - push_items, push_items, 1);
        copy_extent_buffer(dst, src,
                           btrfs_node_key_ptr_offset(0),
                           btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        int mid;
        int ret;
        u32 c_nritems;
+        int tree_mod_log_removal = 1;
        c = path->nodes[level];
        WARN_ON(btrfs_header_generation(c) != trans->transid);
        if (c == root->node) {
                /* trying to split the root, lets make a new one */
                ret = insert_new_root(trans, root, path, level + 1);
+                /*
+                 * removal of root nodes has been logged by
+                 * tree_mod_log_set_root_pointer due to locking
+                 */
+                tree_mod_log_removal = 0;
                if (ret)
                        return ret;
        } else {
@@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
                            (unsigned long)btrfs_header_chunk_tree_uuid(split),
                            BTRFS_UUID_SIZE);
-        tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+        tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid,
+                             tree_mod_log_removal);
        copy_extent_buffer(split, c,
                           btrfs_node_key_ptr_offset(0),
                           btrfs_node_key_ptr_offset(mid),
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0b278b117cbe..14fce27b4780 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -22,8 +22,9 @@
 #include "disk-io.h"
 #include "transaction.h"
-#define BTRFS_DELAYED_WRITEBACK         400
+#define BTRFS_DELAYED_WRITEBACK         512
-#define BTRFS_DELAYED_BACKGROUND        100
+#define BTRFS_DELAYED_BACKGROUND        128
+#define BTRFS_DELAYED_BATCH             16
 static struct kmem_cache *delayed_node_cache;
@@ -494,6 +495,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
                                        BTRFS_DELAYED_DELETION_ITEM);
 }
+static void finish_one_item(struct btrfs_delayed_root *delayed_root)
+{
+        int seq = atomic_inc_return(&delayed_root->items_seq);
+        if ((atomic_dec_return(&delayed_root->items) <
+            BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
+            waitqueue_active(&delayed_root->wait))
+                wake_up(&delayed_root->wait);
+}
 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
 {
        struct rb_root *root;
@@ -512,10 +522,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
        rb_erase(&delayed_item->rb_node, root);
        delayed_item->delayed_node->count--;
-        if (atomic_dec_return(&delayed_root->items) <
-            BTRFS_DELAYED_BACKGROUND &&
+        finish_one_item(delayed_root);
-            waitqueue_active(&delayed_root->wait))
-                wake_up(&delayed_root->wait);
 }
 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
@@ -1056,10 +1064,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
                delayed_node->count--;
                delayed_root = delayed_node->root->fs_info->delayed_root;
-                if (atomic_dec_return(&delayed_root->items) <
+                finish_one_item(delayed_root);
-                    BTRFS_DELAYED_BACKGROUND &&
-                    waitqueue_active(&delayed_root->wait))
-                        wake_up(&delayed_root->wait);
        }
 }
@@ -1304,35 +1309,44 @@ void btrfs_remove_delayed_node(struct inode *inode)
        btrfs_release_delayed_node(delayed_node);
 }
-struct btrfs_async_delayed_node {
+struct btrfs_async_delayed_work {
-        struct btrfs_root *root;
+        struct btrfs_delayed_root *delayed_root;
-        struct btrfs_delayed_node *delayed_node;
+        int nr;
        struct btrfs_work work;
 };
-static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
+static void btrfs_async_run_delayed_root(struct btrfs_work *work)
 {
-        struct btrfs_async_delayed_node *async_node;
+        struct btrfs_async_delayed_work *async_work;
+        struct btrfs_delayed_root *delayed_root;
        struct btrfs_trans_handle *trans;
        struct btrfs_path *path;
        struct btrfs_delayed_node *delayed_node = NULL;
        struct btrfs_root *root;
        struct btrfs_block_rsv *block_rsv;
-        int need_requeue = 0;
+        int total_done = 0;
-        async_node = container_of(work, struct btrfs_async_delayed_node, work);
+        async_work = container_of(work, struct btrfs_async_delayed_work, work);
+        delayed_root = async_work->delayed_root;
        path = btrfs_alloc_path();
        if (!path)
                goto out;
-        path->leave_spinning = 1;
-        delayed_node = async_node->delayed_node;
+again:
+        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
+                goto free_path;
+        delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
+        if (!delayed_node)
+                goto free_path;
+        path->leave_spinning = 1;
        root = delayed_node->root;
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
-                goto free_path;
+                goto release_path;
        block_rsv = trans->block_rsv;
        trans->block_rsv = &root->fs_info->delayed_block_rsv;
@@ -1363,57 +1377,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
         * Task1 will sleep until the transaction is commited.
         */
        mutex_lock(&delayed_node->mutex);
-        if (delayed_node->count)
+        btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
-                need_requeue = 1;
-        else
-                btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
-                                           delayed_node);
        mutex_unlock(&delayed_node->mutex);
        trans->block_rsv = block_rsv;
        btrfs_end_transaction_dmeta(trans, root);
        btrfs_btree_balance_dirty_nodelay(root);
+release_path:
+        btrfs_release_path(path);
+        total_done++;
+        btrfs_release_prepared_delayed_node(delayed_node);
+        if (async_work->nr == 0 || total_done < async_work->nr)
+                goto again;
 free_path:
        btrfs_free_path(path);
 out:
-        if (need_requeue)
+        wake_up(&delayed_root->wait);
-                btrfs_requeue_work(&async_node->work);
+        kfree(async_work);
-        else {
-                btrfs_release_prepared_delayed_node(delayed_node);
-                kfree(async_node);
-        }
 }
 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
-                                     struct btrfs_root *root, int all)
+                                     struct btrfs_root *root, int nr)
 {
-        struct btrfs_async_delayed_node *async_node;
+        struct btrfs_async_delayed_work *async_work;
-        struct btrfs_delayed_node *curr;
-        int count = 0;
-again:
+        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
-        curr = btrfs_first_prepared_delayed_node(delayed_root);
-        if (!curr)
                return 0;
-        async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
+        async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
-        if (!async_node) {
+        if (!async_work)
-                btrfs_release_prepared_delayed_node(curr);
                return -ENOMEM;
-        }
-        async_node->root = root;
-        async_node->delayed_node = curr;
-        async_node->work.func = btrfs_async_run_delayed_node_done;
-        async_node->work.flags = 0;
-        btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
+        async_work->delayed_root = delayed_root;
-        count++;
+        async_work->work.func = btrfs_async_run_delayed_root;
+        async_work->work.flags = 0;
-        if (all || count < 4)
+        async_work->nr = nr;
-                goto again;
+        btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);
        return 0;
 }
@@ -1424,30 +1428,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
        WARN_ON(btrfs_first_delayed_node(delayed_root));
 }
+static int refs_newer(struct btrfs_delayed_root *delayed_root,
+                      int seq, int count)
+{
+        int val = atomic_read(&delayed_root->items_seq);
+        if (val < seq || val >= seq + count)
+                return 1;
+        return 0;
+}
 void btrfs_balance_delayed_items(struct btrfs_root *root)
 {
        struct btrfs_delayed_root *delayed_root;
+        int seq;
        delayed_root = btrfs_get_delayed_root(root);
        if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
                return;
+        seq = atomic_read(&delayed_root->items_seq);
        if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
                int ret;
-                ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
+                DEFINE_WAIT(__wait);
+                ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
                if (ret)
                        return;
-                wait_event_interruptible_timeout(
+                while (1) {
-                                delayed_root->wait,
+                        prepare_to_wait(&delayed_root->wait, &__wait,
-                                (atomic_read(&delayed_root->items) <
+                                        TASK_INTERRUPTIBLE);
-                                 BTRFS_DELAYED_BACKGROUND),
-                                HZ);
+                        if (refs_newer(delayed_root, seq,
-                return;
+                                       BTRFS_DELAYED_BATCH) ||
+                            atomic_read(&delayed_root->items) <
+                            BTRFS_DELAYED_BACKGROUND) {
+                                break;
+                        }
+                        if (!signal_pending(current))
+                                schedule();
+                        else
+                                break;
+                }
+                finish_wait(&delayed_root->wait, &__wait);
        }
-        btrfs_wq_run_delayed_node(delayed_root, root, 0);
+        btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
 }
 /* Will return 0 or -ENOMEM */
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 78b6ad0fc669..1d5c5f7abe3e 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -43,6 +43,7 @@ struct btrfs_delayed_root {
         */
        struct list_head prepare_list;
        atomic_t items;         /* for delayed items */
+        atomic_t items_seq;     /* for delayed items */
        int nodes;              /* for delayed nodes */
        wait_queue_head_t wait;
 };
@@ -86,6 +87,7 @@ static inline void btrfs_init_delayed_root(
                                struct btrfs_delayed_root *delayed_root)
 {
        atomic_set(&delayed_root->items, 0);
+        atomic_set(&delayed_root->items_seq, 0);
        delayed_root->nodes = 0;
        spin_lock_init(&delayed_root->lock);
        init_waitqueue_head(&delayed_root->wait);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02369a3c162e..6d19a0a554aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -62,7 +62,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                                      struct btrfs_root *root);
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
 static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
 static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                                        struct extent_io_tree *dirty_pages,
@@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
                                      0, objectid, NULL, 0, 0, 0);
        if (IS_ERR(leaf)) {
                ret = PTR_ERR(leaf);
+                leaf = NULL;
                goto fail;
        }
@@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        btrfs_tree_unlock(leaf);
+        return root;
 fail:
-        if (ret)
+        if (leaf) {
-                return ERR_PTR(ret);
+                btrfs_tree_unlock(leaf);
+                free_extent_buffer(leaf);
+        }
+        kfree(root);
-        return root;
+        return ERR_PTR(ret);
 }
 static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
@@ -3253,7 +3259,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
-        if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
                btrfs_free_log(NULL, root);
                btrfs_free_log_root_tree(NULL, fs_info);
        }
@@ -3687,7 +3693,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
        return ret;
 }
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
 {
        struct btrfs_pending_snapshot *snapshot;
        struct list_head splice;
@@ -3700,10 +3706,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
                snapshot = list_entry(splice.next,
                                      struct btrfs_pending_snapshot,
                                      list);
+                snapshot->error = -ECANCELED;
                list_del_init(&snapshot->list);
-                kfree(snapshot);
        }
 }
@@ -3840,6 +3844,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
        cur_trans->blocked = 1;
        wake_up(&root->fs_info->transaction_blocked_wait);
+        btrfs_evict_pending_snapshots(cur_trans);
        cur_trans->blocked = 0;
        wake_up(&root->fs_info->transaction_wait);
@@ -3849,8 +3855,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
        btrfs_destroy_delayed_inodes(root);
        btrfs_assert_delayed_root_empty(root);
-        btrfs_destroy_pending_snapshots(cur_trans);
        btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
                                     EXTENT_DIRTY);
        btrfs_destroy_pinned_extent(root,
@@ -3894,6 +3898,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
                if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
                        wake_up(&root->fs_info->transaction_blocked_wait);
+                btrfs_evict_pending_snapshots(t);
                t->blocked = 0;
                smp_mb();
                if (waitqueue_active(&root->fs_info->transaction_wait))
@@ -3907,8 +3913,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
                btrfs_destroy_delayed_inodes(root);
                btrfs_assert_delayed_root_empty(root);
-                btrfs_destroy_pending_snapshots(t);
                btrfs_destroy_delalloc_inodes(root);
                spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3e074dab2d57..3d551231caba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root,
                cache->bytes_super += stripe_len;
                ret = add_excluded_extent(root, cache->key.objectid,
                                          stripe_len);
-                BUG_ON(ret); /* -ENOMEM */
+                if (ret)
+                        return ret;
        }
        for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root,
                ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
                                       cache->key.objectid, bytenr,
                                       0, &logical, &nr, &stripe_len);
-                BUG_ON(ret); /* -ENOMEM */
+                if (ret)
+                        return ret;
                while (nr--) {
                        cache->bytes_super += stripe_len;
                        ret = add_excluded_extent(root, logical[nr],
                                                  stripe_len);
-                        BUG_ON(ret); /* -ENOMEM */
+                        if (ret) {
+                                kfree(logical);
+                                return ret;
+                        }
                }
                kfree(logical);
@@ -1467,8 +1472,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
        if (ret && !insert) {
                err = -ENOENT;
                goto out;
+        } else if (ret) {
+                err = -EIO;
+                WARN_ON(1);
+                goto out;
        }
-        BUG_ON(ret); /* Corruption */
        leaf = path->nodes[0];
        item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -4435,7 +4443,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
        spin_lock(&sinfo->lock);
        spin_lock(&block_rsv->lock);
-        block_rsv->size = num_bytes;
+        block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
        num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
                    sinfo->bytes_reserved + sinfo->bytes_readonly +
@@ -4790,14 +4798,49 @@ out_fail:
         * If the inodes csum_bytes is the same as the original
         * csum_bytes then we know we haven't raced with any free()ers
         * so we can just reduce our inodes csum bytes and carry on.
-         * Otherwise we have to do the normal free thing to account for
-         * the case that the free side didn't free up its reserve
-         * because of this outstanding reservation.
         */
-        if (BTRFS_I(inode)->csum_bytes == csum_bytes)
+        if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
                calc_csum_metadata_size(inode, num_bytes, 0);
-        else
+        } else {
-                to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+                u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
+                u64 bytes;
+                /*
+                 * This is tricky, but first we need to figure out how much we
+                 * free'd from any free-ers that occured during this
+                 * reservation, so we reset ->csum_bytes to the csum_bytes
+                 * before we dropped our lock, and then call the free for the
+                 * number of bytes that were freed while we were trying our
+                 * reservation.
+                 */
+                bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
+                BTRFS_I(inode)->csum_bytes = csum_bytes;
+                to_free = calc_csum_metadata_size(inode, bytes, 0);
+                /*
+                 * Now we need to see how much we would have freed had we not
+                 * been making this reservation and our ->csum_bytes were not
+                 * artificially inflated.
+                 */
+                BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
+                bytes = csum_bytes - orig_csum_bytes;
+                bytes = calc_csum_metadata_size(inode, bytes, 0);
+                /*
+                 * Now reset ->csum_bytes to what it should be.  If bytes is
+                 * more than to_free then we would have free'd more space had we
+                 * not had an artificially high ->csum_bytes, so we need to free
+                 * the remainder.  If bytes is the same or less then we don't
+                 * need to do anything, the other free-ers did the correct
+                 * thing.
+                 */
+                BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
+                if (bytes > to_free)
+                        to_free = bytes - to_free;
+                else
+                        to_free = 0;
+        }
        spin_unlock(&BTRFS_I(inode)->lock);
        if (dropped)
                to_free += btrfs_calc_trans_metadata_size(root, dropped);
@@ -7944,7 +7987,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * info has super bytes accounted for, otherwise we'll think
                 * we have more space than we actually do.
                 */
-                exclude_super_stripes(root, cache);
+                ret = exclude_super_stripes(root, cache);
+                if (ret) {
+                        /*
+                         * We may have excluded something, so call this just in
+                         * case.
+                         */
+                        free_excluded_extents(root, cache);
+                        kfree(cache->free_space_ctl);
+                        kfree(cache);
+                        goto error;
+                }
                /*
                 * check for two cases, either we are full, and therefore
@@ -8086,7 +8139,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        cache->last_byte_to_unpin = (u64)-1;
        cache->cached = BTRFS_CACHE_FINISHED;
-        exclude_super_stripes(root, cache);
+        ret = exclude_super_stripes(root, cache);
+        if (ret) {
+                /*
+                 * We may have excluded something, so call this just in
+                 * case.
+                 */
+                free_excluded_extents(root, cache);
+                kfree(cache->free_space_ctl);
+                kfree(cache);
+                return ret;
+        }
        add_new_free_space(cache, root->fs_info, chunk_offset,
                           chunk_offset + size);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..cdee391fc7bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
                                GFP_NOFS);
 }
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+        unsigned long index = start >> PAGE_CACHE_SHIFT;
+        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+        struct page *page;
+        while (index <= end_index) {
+                page = find_get_page(inode->i_mapping, index);
+                BUG_ON(!page); /* Pages should be in the extent_io_tree */
+                clear_page_dirty_for_io(page);
+                page_cache_release(page);
+                index++;
+        }
+        return 0;
+}
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+        unsigned long index = start >> PAGE_CACHE_SHIFT;
+        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+        struct page *page;
+        while (index <= end_index) {
+                page = find_get_page(inode->i_mapping, index);
+                BUG_ON(!page); /* Pages should be in the extent_io_tree */
+                account_page_redirty(page);
+                __set_page_dirty_nobuffers(page);
+                page_cache_release(page);
+                index++;
+        }
+        return 0;
+}
 /*
 * helper function to set both pages and extents in the tree writeback
 */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..258c92156857 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
                      unsigned long *map_len);
 int extent_range_uptodate(struct extent_io_tree *tree,
                          u64 start, u64 end);
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
 int extent_clear_unlock_delalloc(struct inode *inode,
                                struct extent_io_tree *tree,
                                u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ec160202be3e..c4628a201cb3 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
                csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
                csums_in_item /= csum_size;
-                if (csum_offset >= csums_in_item) {
+                if (csum_offset == csums_in_item) {
                        ret = -EFBIG;
                        goto fail;
+                } else if (csum_offset > csums_in_item) {
+                        goto fail;
                }
        }
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
@@ -728,7 +730,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        sector_sum = sums->sums;
-        trans->adding_csums = 1;
 again:
        next_offset = (u64)-1;
        found_next = 0;
@@ -899,7 +900,6 @@ next_sector:
                goto again;
        }
 out:
-        trans->adding_csums = 0;
        btrfs_free_path(path);
        return ret;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index af1d0605a5c1..ade03e6f7bd2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -591,6 +591,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                }
                compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
                clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+                clear_bit(EXTENT_FLAG_LOGGING, &flags);
                remove_extent_mapping(em_tree, em);
                if (no_splits)
                        goto next;
@@ -2141,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 {
        struct inode *inode = file_inode(file);
        struct extent_state *cached_state = NULL;
+        struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 cur_offset;
        u64 last_byte;
        u64 alloc_start;
@@ -2168,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode,
        ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
        if (ret)
                return ret;
+        if (root->fs_info->quota_enabled) {
+                ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
+                if (ret)
+                        goto out_reserve_fail;
+        }
        /*
         * wait for ordered IO before we have any locks.  We'll loop again
@@ -2271,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode,
                             &cached_state, GFP_NOFS);
 out:
        mutex_unlock(&inode->i_mutex);
+        if (root->fs_info->quota_enabled)
+                btrfs_qgroup_free(root, alloc_end - alloc_start);
+out_reserve_fail:
        /* Let go of our reservation. */
        btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
        return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c226daefd65d..09c58a35b429 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
        int i;
        int will_compress;
        int compress_type = root->fs_info->compress_type;
+        int redirty = 0;
        /* if this is a small write inside eof, kick off a defrag */
        if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +416,17 @@ again:
                if (BTRFS_I(inode)->force_compress)
                        compress_type = BTRFS_I(inode)->force_compress;
+                /*
+                 * we need to call clear_page_dirty_for_io on each
+                 * page in the range.  Otherwise applications with the file
+                 * mmap'd can wander in and change the page contents while
+                 * we are compressing them.
+                 *
+                 * If the compression fails for any reason, we set the pages
+                 * dirty again later on.
+                 */
+                extent_range_clear_dirty_for_io(inode, start, end);
+                redirty = 1;
                ret = btrfs_compress_pages(compress_type,
                                           inode->i_mapping, start,
                                           total_compressed, pages,
@@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:
                        __set_page_dirty_nobuffers(locked_page);
                        /* unlocked later on in the async handlers */
                }
+                if (redirty)
+                        extent_range_redirty_for_io(inode, start, end);
                add_async_extent(async_cow, start, end - start + 1,
                                 0, NULL, 0, BTRFS_COMPRESS_NONE);
                *num_added += 1;
@@ -1743,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
        struct btrfs_ordered_sum *sum;
        list_for_each_entry(sum, list, list) {
+                trans->adding_csums = 1;
                btrfs_csum_file_blocks(trans,
                       BTRFS_I(inode)->root->fs_info->csum_root, sum);
+                trans->adding_csums = 0;
        }
        return 0;
 }
@@ -2312,6 +2328,7 @@ again:
        key.type = BTRFS_EXTENT_DATA_KEY;
        key.offset = start;
+        path->leave_spinning = 1;
        if (merge) {
                struct btrfs_file_extent_item *fi;
                u64 extent_len;
@@ -2368,6 +2385,7 @@ again:
        btrfs_mark_buffer_dirty(leaf);
        inode_add_bytes(inode, len);
+        btrfs_release_path(path);
        ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
                        new->disk_len, 0,
@@ -2381,6 +2399,7 @@ again:
        ret = 1;
 out_free_path:
        btrfs_release_path(path);
+        path->leave_spinning = 0;
        btrfs_end_transaction(trans, root);
 out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
@@ -3676,11 +3695,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
         * 1 for the dir item
         * 1 for the dir index
         * 1 for the inode ref
-         * 1 for the inode ref in the tree log
-         * 2 for the dir entries in the log
         * 1 for the inode
         */
-        trans = btrfs_start_transaction(root, 8);
+        trans = btrfs_start_transaction(root, 5);
        if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
                return trans;
@@ -8124,7 +8141,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items
         * should cover the worst case number of items we'll modify.
         */
-        trans = btrfs_start_transaction(root, 20);
+        trans = btrfs_start_transaction(root, 11);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                goto out_notrans;
@@ -8502,6 +8519,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
        struct btrfs_key ins;
        u64 cur_offset = start;
        u64 i_size;
+        u64 cur_bytes;
        int ret = 0;
        bool own_trans = true;
@@ -8516,8 +8534,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                        }
                }
-                ret = btrfs_reserve_extent(trans, root,
+                cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
-                                           min(num_bytes, 256ULL * 1024 * 1024),
+                cur_bytes = max(cur_bytes, min_size);
+                ret = btrfs_reserve_extent(trans, root, cur_bytes,
                                           min_size, 0, *alloc_hint, &ins, 1);
                if (ret) {
                        if (own_trans)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c83086fdda05..2c02310ff2d9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -527,6 +527,8 @@ fail:
        if (async_transid) {
                *async_transid = trans->transid;
                err = btrfs_commit_transaction_async(trans, root, 1);
+                if (err)
+                        err = btrfs_commit_transaction(trans, root);
        } else {
                err = btrfs_commit_transaction(trans, root);
        }
@@ -592,16 +594,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                *async_transid = trans->transid;
                ret = btrfs_commit_transaction_async(trans,
                                     root->fs_info->extent_root, 1);
+                if (ret)
+                        ret = btrfs_commit_transaction(trans, root);
        } else {
                ret = btrfs_commit_transaction(trans,
                                               root->fs_info->extent_root);
        }
-        if (ret) {
+        if (ret)
-                /* cleanup_transaction has freed this for us */
-                if (trans->aborted)
-                        pending_snapshot = NULL;
                goto fail;
-        }
        ret = pending_snapshot->error;
        if (ret)
@@ -2245,13 +2245,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
        if (ret)
                return ret;
-        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
-                        1)) {
-                pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-                mnt_drop_write_file(file);
-                return -EINVAL;
-        }
        if (btrfs_root_readonly(root)) {
                ret = -EROFS;
                goto out;
@@ -2306,7 +2299,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
                ret = -EINVAL;
        }
 out:
-        atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
        mnt_drop_write_file(file);
        return ret;
 }
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index ca52681e5f40..b81e0e9a4894 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -26,7 +26,6 @@
 void btrfs_tree_lock(struct extent_buffer *eb);
 void btrfs_tree_unlock(struct extent_buffer *eb);
-int btrfs_try_spin_lock(struct extent_buffer *eb);
 void btrfs_tree_read_lock(struct extent_buffer *eb);
 void btrfs_tree_read_unlock(struct extent_buffer *eb);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index dc08d77b717e..005c45db699e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
        INIT_LIST_HEAD(&splice);
        INIT_LIST_HEAD(&works);
+        mutex_lock(&root->fs_info->ordered_operations_mutex);
        spin_lock(&root->fs_info->ordered_extent_lock);
        list_splice_init(&root->fs_info->ordered_extents, &splice);
        while (!list_empty(&splice)) {
@@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
                cond_resched();
        }
+        mutex_unlock(&root->fs_info->ordered_operations_mutex);
 }
 /*
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index aee4b1cc3d98..b44124dd2370 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
        ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
                                   sgn > 0 ? node->seq - 1 : node->seq, &roots);
        if (ret < 0)
-                goto out;
+                return ret;
        spin_lock(&fs_info->qgroup_lock);
        quota_root = fs_info->quota_root;
@@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
        ret = 0;
 unlock:
        spin_unlock(&fs_info->qgroup_lock);
-out:
        ulist_free(roots);
        ulist_free(tmp);
@@ -1525,21 +1524,23 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
                if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
                    qg->reserved + qg->rfer + num_bytes >
-                    qg->max_rfer)
+                    qg->max_rfer) {
                        ret = -EDQUOT;
+                        goto out;
+                }
                if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
                    qg->reserved + qg->excl + num_bytes >
-                    qg->max_excl)
+                    qg->max_excl) {
                        ret = -EDQUOT;
+                        goto out;
+                }
                list_for_each_entry(glist, &qg->groups, next_group) {
                        ulist_add(ulist, glist->group->qgroupid,
                                  (uintptr_t)glist->group, GFP_ATOMIC);
                }
        }
-        if (ret)
-                goto out;
        /*
         * no limits exceeded, now record the reservation into all qgroups
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 50695dc5e2ab..b67171e6d688 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1269,6 +1269,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
        }
        spin_unlock(&rc->reloc_root_tree.lock);
+        if (!node)
+                return 0;
        BUG_ON((struct btrfs_root *)node->data != root);
        if (!del) {
@@ -2238,13 +2240,28 @@ again:
 }
 static noinline_for_stack
+void free_reloc_roots(struct list_head *list)
+{
+        struct btrfs_root *reloc_root;
+        while (!list_empty(list)) {
+                reloc_root = list_entry(list->next, struct btrfs_root,
+                                        root_list);
+                __update_reloc_root(reloc_root, 1);
+                free_extent_buffer(reloc_root->node);
+                free_extent_buffer(reloc_root->commit_root);
+                kfree(reloc_root);
+        }
+}
+static noinline_for_stack
 int merge_reloc_roots(struct reloc_control *rc)
 {
        struct btrfs_root *root;
        struct btrfs_root *reloc_root;
        LIST_HEAD(reloc_roots);
        int found = 0;
-        int ret;
+        int ret = 0;
 again:
        root = rc->extent_root;
@@ -2270,20 +2287,33 @@ again:
                        BUG_ON(root->reloc_root != reloc_root);
                        ret = merge_reloc_root(rc, root);
-                        BUG_ON(ret);
+                        if (ret)
+                                goto out;
                } else {
                        list_del_init(&reloc_root->root_list);
                }
                ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
-                BUG_ON(ret < 0);
+                if (ret < 0) {
+                        if (list_empty(&reloc_root->root_list))
+                                list_add_tail(&reloc_root->root_list,
+                                              &reloc_roots);
+                        goto out;
+                }
        }
        if (found) {
                found = 0;
                goto again;
        }
+out:
+        if (ret) {
+                btrfs_std_error(root->fs_info, ret);
+                if (!list_empty(&reloc_roots))
+                        free_reloc_roots(&reloc_roots);
+        }
        BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
-        return 0;
+        return ret;
 }
 static void free_block_list(struct rb_root *blocks)
@@ -2818,8 +2848,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
        int err = 0;
        path = btrfs_alloc_path();
-        if (!path)
+        if (!path) {
-                return -ENOMEM;
+                err = -ENOMEM;
+                goto out_path;
+        }
        rb_node = rb_first(blocks);
        while (rb_node) {
@@ -2858,10 +2890,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
                rb_node = rb_next(rb_node);
        }
 out:
-        free_block_list(blocks);
        err = finish_pending_nodes(trans, rc, path, err);
        btrfs_free_path(path);
+out_path:
+        free_block_list(blocks);
        return err;
 }
@@ -3698,7 +3731,15 @@ int prepare_to_relocate(struct reloc_control *rc)
        set_reloc_control(rc);
        trans = btrfs_join_transaction(rc->extent_root);
-        BUG_ON(IS_ERR(trans));
+        if (IS_ERR(trans)) {
+                unset_reloc_control(rc);
+                /*
+                 * extent tree is not a ref_cow tree and has no reloc_root to
+                 * cleanup.  And callers are responsible to free the above
+                 * block rsv.
+                 */
+                return PTR_ERR(trans);
+        }
        btrfs_commit_transaction(trans, rc->extent_root);
        return 0;
 }
@@ -3730,7 +3771,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        while (1) {
                progress++;
                trans = btrfs_start_transaction(rc->extent_root, 0);
-                BUG_ON(IS_ERR(trans));
+                if (IS_ERR(trans)) {
+                        err = PTR_ERR(trans);
+                        trans = NULL;
+                        break;
+                }
 restart:
                if (update_backref_cache(trans, &rc->backref_cache)) {
                        btrfs_end_transaction(trans, rc->extent_root);
@@ -4264,14 +4309,9 @@ int btrfs_recover_relocation(struct btrfs_root *root)
 out_free:
        kfree(rc);
 out:
-        while (!list_empty(&reloc_roots)) {
+        if (!list_empty(&reloc_roots))
-                reloc_root = list_entry(reloc_roots.next,
+                free_reloc_roots(&reloc_roots);
-                                        struct btrfs_root, root_list);
-                list_del(&reloc_root->root_list);
-                free_extent_buffer(reloc_root->node);
-                free_extent_buffer(reloc_root->commit_root);
-                kfree(reloc_root);
-        }
        btrfs_free_path(path);
        if (err == 0) {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 53c3501fa4ca..85e072b956d5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
        eb = path->nodes[0];
        ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
        item_size = btrfs_item_size_nr(eb, path->slots[0]);
-        btrfs_release_path(path);
        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
                do {
@@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
                                ret < 0 ? -1 : ref_level,
                                ret < 0 ? -1 : ref_root);
                } while (ret != 1);
+                btrfs_release_path(path);
        } else {
+                btrfs_release_path(path);
                swarn.path = path;
                swarn.dev = dev;
                iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f7a8b861058b..c85e7c6b4598 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3945,12 +3945,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
                    found_key.type != key.type) {
                        key.offset += right_len;
                        break;
-                } else {
+                }
-                        if (found_key.offset != key.offset + right_len) {
+                if (found_key.offset != key.offset + right_len) {
-                                /* Should really not happen */
+                        ret = 0;
-                                ret = -EIO;
+                        goto out;
-                                goto out;
-                        }
                }
                key = found_key;
        }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 68a29a1ea068..f6b88595f858 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1558,6 +1558,7 @@ static struct file_system_type btrfs_fs_type = {
        .kill_sb        = btrfs_kill_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("btrfs");
 /*
 * used by btrfsctl to scan devices when no FS is mounted
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e52da6fb1165..50767bbaad6c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -625,14 +625,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        btrfs_trans_release_metadata(trans, root);
        trans->block_rsv = NULL;
-        /*
-         * the same root has to be passed to start_transaction and
-         * end_transaction. Subvolume quota depends on this.
-         */
-        WARN_ON(trans->root != root);
        if (trans->qgroup_reserved) {
-                btrfs_qgroup_free(root, trans->qgroup_reserved);
+                /*
+                 * the same root has to be passed here between start_transaction
+                 * and end_transaction. Subvolume quota depends on this.
+                 */
+                btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
                trans->qgroup_reserved = 0;
        }
@@ -1052,7 +1051,12 @@ int btrfs_defrag_root(struct btrfs_root *root)
 /*
 * new snapshots need to be created at a very specific time in the
- * transaction commit.  This does the actual creation
+ * transaction commit.  This does the actual creation.
+ *
+ * Note:
+ * If the error which may affect the commitment of the current transaction
+ * happens, we should return the error number. If the error which just affect
+ * the creation of the pending snapshots, just return 0.
 */
 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                                   struct btrfs_fs_info *fs_info,
@@ -1071,7 +1075,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        struct extent_buffer *tmp;
        struct extent_buffer *old;
        struct timespec cur_time = CURRENT_TIME;
-        int ret;
+        int ret = 0;
        u64 to_reserve = 0;
        u64 index = 0;
        u64 objectid;
@@ -1080,40 +1084,36 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        if (!path) {
-                ret = pending->error = -ENOMEM;
+                pending->error = -ENOMEM;
-                return ret;
+                return 0;
        }
        new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
        if (!new_root_item) {
-                ret = pending->error = -ENOMEM;
+                pending->error = -ENOMEM;
                goto root_item_alloc_fail;
        }
-        ret = btrfs_find_free_objectid(tree_root, &objectid);
+        pending->error = btrfs_find_free_objectid(tree_root, &objectid);
-        if (ret) {
+        if (pending->error)
-                pending->error = ret;
                goto no_free_objectid;
-        }
        btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
        if (to_reserve > 0) {
-                ret = btrfs_block_rsv_add(root, &pending->block_rsv,
+                pending->error = btrfs_block_rsv_add(root,
-                                          to_reserve,
+                                                     &pending->block_rsv,
-                                          BTRFS_RESERVE_NO_FLUSH);
+                                                     to_reserve,
-                if (ret) {
+                                                     BTRFS_RESERVE_NO_FLUSH);
-                        pending->error = ret;
+                if (pending->error)
                        goto no_free_objectid;
-                }
        }
-        ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
+        pending->error = btrfs_qgroup_inherit(trans, fs_info,
-                                   objectid, pending->inherit);
+                                              root->root_key.objectid,
-        if (ret) {
+                                              objectid, pending->inherit);
-                pending->error = ret;
+        if (pending->error)
                goto no_free_objectid;
-        }
        key.objectid = objectid;
        key.offset = (u64)-1;
@@ -1141,7 +1141,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                                         dentry->d_name.len, 0);
        if (dir_item != NULL && !IS_ERR(dir_item)) {
                pending->error = -EEXIST;
-                goto fail;
+                goto dir_item_existed;
        } else if (IS_ERR(dir_item)) {
                ret = PTR_ERR(dir_item);
                btrfs_abort_transaction(trans, root, ret);
@@ -1272,6 +1272,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
 fail:
+        pending->error = ret;
+dir_item_existed:
        trans->block_rsv = rsv;
        trans->bytes_reserved = 0;
 no_free_objectid:
@@ -1287,12 +1289,17 @@ root_item_alloc_fail:
 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
                                             struct btrfs_fs_info *fs_info)
 {
-        struct btrfs_pending_snapshot *pending;
+        struct btrfs_pending_snapshot *pending, *next;
        struct list_head *head = &trans->transaction->pending_snapshots;
+        int ret = 0;
-        list_for_each_entry(pending, head, list)
+        list_for_each_entry_safe(pending, next, head, list) {
-                create_pending_snapshot(trans, fs_info, pending);
+                list_del(&pending->list);
-        return 0;
+                ret = create_pending_snapshot(trans, fs_info, pending);
+                if (ret)
+                        break;
+        }
+        return ret;
 }
 static void update_super_roots(struct btrfs_root *root)
@@ -1448,6 +1455,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
        btrfs_abort_transaction(trans, root, err);
        spin_lock(&root->fs_info->trans_lock);
+        if (list_empty(&cur_trans->list)) {
+                spin_unlock(&root->fs_info->trans_lock);
+                btrfs_end_transaction(trans, root);
+                return;
+        }
        list_del_init(&cur_trans->list);
        if (cur_trans == root->fs_info->running_transaction) {
                root->fs_info->trans_no_join = 1;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c7ef569eb22a..451fad96ecd1 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1382,7 +1382,10 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
        btrfs_release_path(path);
        if (ret == 0) {
-                btrfs_inc_nlink(inode);
+                if (!inode->i_nlink)
+                        set_nlink(inode, 1);
+                else
+                        btrfs_inc_nlink(inode);
                ret = btrfs_update_inode(trans, root, inode);
        } else if (ret == -EEXIST) {
                ret = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 35bb2d4ed29f..2854c824ab64 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -684,6 +684,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                __btrfs_close_devices(fs_devices);
                free_fs_devices(fs_devices);
        }
+        /*
+         * Wait for rcu kworkers under __btrfs_close_devices
+         * to finish all blkdev_puts so device is really
+         * free when umount is done.
+         */
+        rcu_barrier();
        return ret;
 }
@@ -2379,7 +2385,11 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
                return ret;
        trans = btrfs_start_transaction(root, 0);
-        BUG_ON(IS_ERR(trans));
+        if (IS_ERR(trans)) {
+                ret = PTR_ERR(trans);
+                btrfs_std_error(root->fs_info, ret);
+                return ret;
+        }
        lock_chunks(root);
@@ -3050,7 +3060,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        unset_balance_control(fs_info);
        ret = del_balance_item(fs_info->tree_root);
-        BUG_ON(ret);
+        if (ret)
+                btrfs_std_error(fs_info, ret);
        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
 }
@@ -3230,6 +3241,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                update_ioctl_balance_args(fs_info, 0, bargs);
        }
+        if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
+            balance_need_close(fs_info)) {
+                __cancel_balance(fs_info);
+        }
        wake_up(&fs_info->balance_wait_q);
        return ret;
@@ -4919,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        em = lookup_extent_mapping(em_tree, chunk_start, 1);
        read_unlock(&em_tree->lock);
-        BUG_ON(!em || em->start != chunk_start);
+        if (!em) {
+                printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
+                       chunk_start);
+                return -EIO;
+        }
+        if (em->start != chunk_start) {
+                printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
+                       em->start, chunk_start);
+                free_extent_map(em);
+                return -EIO;
+        }
        map = (struct map_lookup *)em->bdev;
        length = em->len;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9fe17c6c2876..6ddc0bca56b2 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -952,6 +952,7 @@ static struct file_system_type ceph_fs_type = {
        .kill_sb        = ceph_kill_sb,
        .fs_flags       = FS_RENAME_DOES_D_MOVE,
 };
+MODULE_ALIAS_FS("ceph");
 #define _STRINGIFY(x) #x
 #define STRINGIFY(x) _STRINGIFY(x)
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index cfd1ce34e0bc..1d36db114772 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length,
                }
        }
-        /* mechlistMIC */
+        /*
-        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
+         * We currently ignore anything at the end of the SPNEGO blob after
-                /* Check if we have reached the end of the blob, but with
+         * the mechTypes have been parsed, since none of that info is
-                   no mechListMic (e.g. NTLMSSP instead of KRB5) */
+         * used at the moment.
-                if (ctx.error == ASN1_ERR_DEC_EMPTY)
+         */
-                        goto decode_negtoken_exit;
-                cFYI(1, "Error decoding last part negTokenInit exit3");
-                return 0;
-        } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-                /* tag = 3 indicating mechListMIC */
-                cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
-                        cls, con, tag, end, *end);
-                return 0;
-        }
-        /* sequence */
-        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                cFYI(1, "Error decoding last part negTokenInit exit5");
-                return 0;
-        } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
-                   || (tag != ASN1_SEQ)) {
-                cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)",
-                        cls, con, tag, end, *end);
-        }
-        /* sequence of */
-        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                cFYI(1, "Error decoding last part negTokenInit exit 7");
-                return 0;
-        } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
-                cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
-                        cls, con, tag, end, *end);
-                return 0;
-        }
-        /* general string */
-        if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
-                cFYI(1, "Error decoding last part negTokenInit exit9");
-                return 0;
-        } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
-                   || (tag != ASN1_GENSTR)) {
-                cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)",
-                        cls, con, tag, end, *end);
-                return 0;
-        }
-        cFYI(1, "Need to call asn1_octets_decode() function for %s",
-                ctx.pointer);   /* is this UTF-8 or ASCII? */
-decode_negtoken_exit:
        return 1;
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1a052c0eee8e..345fc89c4286 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -91,6 +91,30 @@ struct workqueue_struct	*cifsiod_wq;
 __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
 #endif
+/*
+ * Bumps refcount for cifs super block.
+ * Note that it should be only called if a referece to VFS super block is
+ * already held, e.g. in open-type syscalls context. Otherwise it can race with
+ * atomic_dec_and_test in deactivate_locked_super.
+ */
+void
+cifs_sb_active(struct super_block *sb)
+{
+        struct cifs_sb_info *server = CIFS_SB(sb);
+        if (atomic_inc_return(&server->active) == 1)
+                atomic_inc(&sb->s_active);
+}
+void
+cifs_sb_deactive(struct super_block *sb)
+{
+        struct cifs_sb_info *server = CIFS_SB(sb);
+        if (atomic_dec_and_test(&server->active))
+                deactivate_super(sb);
+}
 static int
 cifs_read_super(struct super_block *sb)
 {
@@ -777,6 +801,7 @@ struct file_system_type cifs_fs_type = {
        .kill_sb = cifs_kill_sb,
        /*  .fs_flags */
 };
+MODULE_ALIAS_FS("cifs");
 const struct inode_operations cifs_dir_inode_ops = {
        .create = cifs_create,
        .atomic_open = cifs_atomic_open,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 7163419cecd9..0e32c3446ce9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -41,6 +41,10 @@ extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;
 extern const struct address_space_operations cifs_addr_ops_smallbuf;
+/* Functions related to super block operations */
+extern void cifs_sb_active(struct super_block *sb);
+extern void cifs_sb_deactive(struct super_block *sb);
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 7353bc5d73d7..8e2e799e7a24 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1909,12 +1909,12 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
        } while (rc == -EAGAIN);
        for (i = 0; i < wdata->nr_pages; i++) {
+                unlock_page(wdata->pages[i]);
                if (rc != 0) {
                        SetPageError(wdata->pages[i]);
                        end_page_writeback(wdata->pages[i]);
                        page_cache_release(wdata->pages[i]);
                }
-                unlock_page(wdata->pages[i]);
        }
        mapping_set_error(inode->i_mapping, rc);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 54125e04fd0c..991c63c6bdd0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -97,7 +97,7 @@ enum {
        Opt_user, Opt_pass, Opt_ip,
        Opt_unc, Opt_domain,
        Opt_srcaddr, Opt_prefixpath,
-        Opt_iocharset, Opt_sockopt,
+        Opt_iocharset,
        Opt_netbiosname, Opt_servern,
        Opt_ver, Opt_vers, Opt_sec, Opt_cache,
@@ -202,7 +202,6 @@ static const match_table_t cifs_mount_option_tokens = {
        { Opt_srcaddr, "srcaddr=%s" },
        { Opt_prefixpath, "prefixpath=%s" },
        { Opt_iocharset, "iocharset=%s" },
-        { Opt_sockopt, "sockopt=%s" },
        { Opt_netbiosname, "netbiosname=%s" },
        { Opt_servern, "servern=%s" },
        { Opt_ver, "ver=%s" },
@@ -1752,19 +1751,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                         */
                        cFYI(1, "iocharset set to %s", string);
                        break;
-                case Opt_sockopt:
-                        string = match_strdup(args);
-                        if (string == NULL)
-                                goto out_nomem;
-                        if (strnicmp(string, "TCP_NODELAY", 11) == 0) {
-                                printk(KERN_WARNING "CIFS: the "
-                                        "sockopt=TCP_NODELAY option has been "
-                                        "deprecated and will be removed "
-                                        "in 3.9\n");
-                                vol->sockopt_tcp_nodelay = 1;
-                        }
-                        break;
                case Opt_netbiosname:
                        string = match_strdup(args);
                        if (string == NULL)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8c0d85577314..7a0dd99e4507 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -300,6 +300,8 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
        mutex_init(&cfile->fh_mutex);
+        cifs_sb_active(inode->i_sb);
        /*
         * If the server returned a read oplock and we have mandatory brlocks,
         * set oplock level to None.
@@ -349,7 +351,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
        struct TCP_Server_Info *server = tcon->ses->server;
        struct cifsInodeInfo *cifsi = CIFS_I(inode);
-        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+        struct super_block *sb = inode->i_sb;
+        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
        struct cifsLockInfo *li, *tmp;
        struct cifs_fid fid;
        struct cifs_pending_open open;
@@ -414,6 +417,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
        cifs_put_tlink(cifs_file->tlink);
        dput(cifs_file->dentry);
+        cifs_sb_deactive(sb);
        kfree(cifs_file);
 }
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 83f2606c76d0..20887bf63121 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -995,6 +995,15 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
                return PTR_ERR(tlink);
        tcon = tlink_tcon(tlink);
+        /*
+         * We cannot rename the file if the server doesn't support
+         * CAP_INFOLEVEL_PASSTHRU
+         */
+        if (!(tcon->ses->capabilities & CAP_INFOLEVEL_PASSTHRU)) {
+                rc = -EBUSY;
+                goto out;
+        }
        rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
                         DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR,
                         &netfid, &oplock, NULL, cifs_sb->local_nls,
@@ -1023,7 +1032,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
                                        current->tgid);
                /* although we would like to mark the file hidden
                   if that fails we will still try to rename it */
-                if (rc != 0)
+                if (!rc)
                        cifsInode->cifsAttrs = dosattr;
                else
                        dosattr = origattr; /* since not able to change them */
@@ -1034,7 +1043,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
                                   cifs_sb->mnt_cifs_flags &
                                            CIFS_MOUNT_MAP_SPECIAL_CHR);
        if (rc != 0) {
-                rc = -ETXTBSY;
+                rc = -EBUSY;
                goto undo_setattr;
        }
@@ -1053,7 +1062,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
                if (rc == -ENOENT)
                        rc = 0;
                else if (rc != 0) {
-                        rc = -ETXTBSY;
+                        rc = -EBUSY;
                        goto undo_rename;
                }
                cifsInode->delete_pending = true;
@@ -1160,15 +1169,13 @@ psx_del_no_retry:
                        cifs_drop_nlink(inode);
        } else if (rc == -ENOENT) {
                d_drop(dentry);
-        } else if (rc == -ETXTBSY) {
+        } else if (rc == -EBUSY) {
                if (server->ops->rename_pending_delete) {
                        rc = server->ops->rename_pending_delete(full_path,
                                                                dentry, xid);
                        if (rc == 0)
                                cifs_drop_nlink(inode);
                }
-                if (rc == -ETXTBSY)
-                        rc = -EBUSY;
        } else if ((rc == -EACCES) && (dosattr == 0) && inode) {
                attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
                if (attrs == NULL) {
@@ -1509,7 +1516,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
         * source. Note that cross directory moves do not work with
         * rename by filehandle to various Windows servers.
         */
-        if (rc == 0 || rc != -ETXTBSY)
+        if (rc == 0 || rc != -EBUSY)
                goto do_rename_exit;
        /* open-file renames don't work across directories */
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index a82bc51fdc82..c0b25b28be6c 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -62,7 +62,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
        {ERRdiffdevice, -EXDEV},
        {ERRnofiles, -ENOENT},
        {ERRwriteprot, -EROFS},
-        {ERRbadshare, -ETXTBSY},
+        {ERRbadshare, -EBUSY},
        {ERRlock, -EACCES},
        {ERRunsup, -EINVAL},
        {ERRnosuchshare, -ENXIO},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c9c7aa7ed966..bceffe7b8f8d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -744,4 +744,5 @@ struct smb_version_values smb30_values = {
        .cap_unix = 0,
        .cap_nt_find = SMB2_NT_FIND,
        .cap_large_files = SMB2_LARGE_FILES,
+        .oplock_read = SMB2_OPLOCK_LEVEL_II,
 };
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index dada9d0abede..4dcc0d81a7aa 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -329,4 +329,5 @@ struct file_system_type coda_fs_type = {
        .kill_sb        = kill_anon_super,
        .fs_flags       = FS_BINARY_MOUNTDATA,
 };
+MODULE_ALIAS_FS("coda");
diff --git a/fs/compat.c b/fs/compat.c
index fe40fde29111..d487985dd0ea 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -558,6 +558,10 @@ ssize_t compat_rw_copy_check_uvector(int type,
        }
        *ret_pointer = iov;
+        ret = -EFAULT;
+        if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
+                goto out;
        /*
         * Single unix specification:
         * We should -EINVAL if an element length is not >= 0 and fitting an
@@ -1080,17 +1084,12 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
        if (!file->f_op)
                goto out;
-        ret = -EFAULT;
+        ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
-        if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
-                goto out;
-        tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs,
                                               UIO_FASTIOV, iovstack, &iov);
-        if (tot_len == 0) {
+        if (ret <= 0)
-                ret = 0;
                goto out;
-        }
+        tot_len = ret;
        ret = rw_verify_area(type, file, pos, tot_len);
        if (ret < 0)
                goto out;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index aee0a7ebbd8e..7f26c3cf75ae 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -114,6 +114,7 @@ static struct file_system_type configfs_fs_type = {
        .mount          = configfs_do_mount,
        .kill_sb        = kill_litter_super,
 };
+MODULE_ALIAS_FS("configfs");
 struct dentry *configfs_pin_fs(void)
 {
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 3ceb9ec976e1..35b1c7bd18b7 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -573,6 +573,7 @@ static struct file_system_type cramfs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("cramfs");
 static int __init init_cramfs_fs(void)
 {
diff --git a/fs/dcache.c b/fs/dcache.c
index fbfae008ba44..e8bc3420d63e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2542,7 +2542,6 @@ static int prepend_path(const struct path *path,
        bool slash = false;
        int error = 0;
-        br_read_lock(&vfsmount_lock);
        while (dentry != root->dentry || vfsmnt != root->mnt) {
                struct dentry * parent;
@@ -2572,8 +2571,6 @@ static int prepend_path(const struct path *path,
        if (!error && !slash)
                error = prepend(buffer, buflen, "/", 1);
-out:
-        br_read_unlock(&vfsmount_lock);
        return error;
 global_root:
@@ -2590,7 +2587,7 @@ global_root:
                error = prepend(buffer, buflen, "/", 1);
        if (!error)
                error = is_mounted(vfsmnt) ? 1 : 2;
-        goto out;
+        return error;
 }
 /**
@@ -2617,9 +2614,11 @@ char *__d_path(const struct path *path,
        int error;
        prepend(&res, &buflen, "\0", 1);
+        br_read_lock(&vfsmount_lock);
        write_seqlock(&rename_lock);
        error = prepend_path(path, root, &res, &buflen);
        write_sequnlock(&rename_lock);
+        br_read_unlock(&vfsmount_lock);
        if (error < 0)
                return ERR_PTR(error);
@@ -2636,9 +2635,11 @@ char *d_absolute_path(const struct path *path,
        int error;
        prepend(&res, &buflen, "\0", 1);
+        br_read_lock(&vfsmount_lock);
        write_seqlock(&rename_lock);
        error = prepend_path(path, &root, &res, &buflen);
        write_sequnlock(&rename_lock);
+        br_read_unlock(&vfsmount_lock);
        if (error > 1)
                error = -EINVAL;
@@ -2702,11 +2703,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
        get_fs_root(current->fs, &root);
+        br_read_lock(&vfsmount_lock);
        write_seqlock(&rename_lock);
        error = path_with_deleted(path, &root, &res, &buflen);
+        write_sequnlock(&rename_lock);
+        br_read_unlock(&vfsmount_lock);
        if (error < 0)
                res = ERR_PTR(error);
-        write_sequnlock(&rename_lock);
        path_put(&root);
        return res;
 }
@@ -2830,6 +2833,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
        get_fs_root_and_pwd(current->fs, &root, &pwd);
        error = -ENOENT;
+        br_read_lock(&vfsmount_lock);
        write_seqlock(&rename_lock);
        if (!d_unlinked(pwd.dentry)) {
                unsigned long len;
@@ -2839,6 +2843,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                prepend(&cwd, &buflen, "\0", 1);
                error = prepend_path(&pwd, &root, &cwd, &buflen);
                write_sequnlock(&rename_lock);
+                br_read_unlock(&vfsmount_lock);
                if (error < 0)
                        goto out;
@@ -2859,6 +2864,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                }
        } else {
                write_sequnlock(&rename_lock);
+                br_read_unlock(&vfsmount_lock);
        }
 out:
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 0c4f80b447fb..4888cb3fdef7 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -299,6 +299,7 @@ static struct file_system_type debug_fs_type = {
        .mount =        debug_mount,
        .kill_sb =      kill_litter_super,
 };
+MODULE_ALIAS_FS("debugfs");
 static struct dentry *__create_file(const char *name, umode_t mode,
                                    struct dentry *parent, void *data,
diff --git a/fs/ecryptfs/Kconfig b/fs/ecryptfs/Kconfig
index e15ef38c24fa..434aa313f077 100644
--- a/fs/ecryptfs/Kconfig
+++ b/fs/ecryptfs/Kconfig
@@ -12,3 +12,11 @@ config ECRYPT_FS
          To compile this file system support as a module, choose M here: the
          module will be called ecryptfs.
+config ECRYPT_FS_MESSAGING
+        bool "Enable notifications for userspace key wrap/unwrap"
+        depends on ECRYPT_FS
+        help
+          Enables the /dev/ecryptfs entry for use by ecryptfsd. This allows
+          for userspace to wrap/unwrap file encryption keys by other
+          backends, like OpenSSL.
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 2cc9ee4ad2eb..49678a69947d 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -1,7 +1,10 @@
 #
-# Makefile for the Linux 2.6 eCryptfs
+# Makefile for the Linux eCryptfs
 #
 obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
-ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o
+ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \
+              crypto.o keystore.o kthread.o debug.o
+ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index a7b0c2dfb3db..d5c25db4398f 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -301,17 +301,14 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
        while (size > 0 && i < sg_size) {
                pg = virt_to_page(addr);
                offset = offset_in_page(addr);
-                if (sg)
+                sg_set_page(&sg[i], pg, 0, offset);
-                        sg_set_page(&sg[i], pg, 0, offset);
                remainder_of_page = PAGE_CACHE_SIZE - offset;
                if (size >= remainder_of_page) {
-                        if (sg)
+                        sg[i].length = remainder_of_page;
-                                sg[i].length = remainder_of_page;
                        addr += remainder_of_page;
                        size -= remainder_of_page;
                } else {
-                        if (sg)
+                        sg[i].length = size;
-                                sg[i].length = size;
                        addr += size;
                        size = 0;
                }
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 1b5d9af937df..bf12ba5dd223 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -45,14 +45,12 @@
 static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct dentry *lower_dentry;
-        struct vfsmount *lower_mnt;
        int rc = 1;
        if (flags & LOOKUP_RCU)
                return -ECHILD;
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
-        lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
        if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
                goto out;
        rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 7e2c6f5d7985..dd299b389d4e 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -172,6 +172,19 @@ ecryptfs_get_key_payload_data(struct key *key)
 #define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 24
 #define ECRYPTFS_ENCRYPTED_DENTRY_NAME_LEN (18 + 1 + 4 + 1 + 32)
+#ifdef CONFIG_ECRYPT_FS_MESSAGING
+# define ECRYPTFS_VERSIONING_MASK_MESSAGING (ECRYPTFS_VERSIONING_DEVMISC \
+                                             | ECRYPTFS_VERSIONING_PUBKEY)
+#else
+# define ECRYPTFS_VERSIONING_MASK_MESSAGING 0
+#endif
+#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
+                                  | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
+                                  | ECRYPTFS_VERSIONING_XATTR \
+                                  | ECRYPTFS_VERSIONING_MULTKEY \
+                                  | ECRYPTFS_VERSIONING_MASK_MESSAGING \
+                                  | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION)
 struct ecryptfs_key_sig {
        struct list_head crypt_stat_list;
        char keysig[ECRYPTFS_SIG_SIZE_HEX + 1];
@@ -399,7 +412,9 @@ struct ecryptfs_daemon {
        struct hlist_node euid_chain;
 };
+#ifdef CONFIG_ECRYPT_FS_MESSAGING
 extern struct mutex ecryptfs_daemon_hash_mux;
+#endif
 static inline size_t
 ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat)
@@ -610,6 +625,7 @@ int
 ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                  size_t size, int flags);
 int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
+#ifdef CONFIG_ECRYPT_FS_MESSAGING
 int ecryptfs_process_response(struct ecryptfs_daemon *daemon,
                              struct ecryptfs_message *msg, u32 seq);
 int ecryptfs_send_message(char *data, int data_len,
@@ -618,6 +634,24 @@ int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
                               struct ecryptfs_message **emsg);
 int ecryptfs_init_messaging(void);
 void ecryptfs_release_messaging(void);
+#else
+static inline int ecryptfs_init_messaging(void)
+{
+        return 0;
+}
+static inline void ecryptfs_release_messaging(void)
+{ }
+static inline int ecryptfs_send_message(char *data, int data_len,
+                                        struct ecryptfs_msg_ctx **msg_ctx)
+{
+        return -ENOTCONN;
+}
+static inline int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
+                                             struct ecryptfs_message **emsg)
+{
+        return -ENOMSG;
+}
+#endif
 void
 ecryptfs_write_header_metadata(char *virt,
@@ -655,12 +689,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
                                     size_t offset_in_page, size_t size,
                                     struct inode *ecryptfs_inode);
 struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index);
-int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
-int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon);
 int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
                                 size_t *length_size);
 int ecryptfs_write_packet_length(char *dest, size_t size,
                                 size_t *packet_size_length);
+#ifdef CONFIG_ECRYPT_FS_MESSAGING
 int ecryptfs_init_ecryptfs_miscdev(void);
 void ecryptfs_destroy_ecryptfs_miscdev(void);
 int ecryptfs_send_miscdev(char *data, size_t data_size,
@@ -669,6 +702,9 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
 void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
 int
 ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file);
+int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
+int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon);
+#endif
 int ecryptfs_init_kthread(void);
 void ecryptfs_destroy_kthread(void);
 int ecryptfs_privileged_open(struct file **lower_file,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 53acc9d0c138..63b1f54b6a1f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -199,7 +199,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
        struct dentry *ecryptfs_dentry = file->f_path.dentry;
        /* Private value of ecryptfs_dentry allocated in
         * ecryptfs_lookup() */
-        struct dentry *lower_dentry;
        struct ecryptfs_file_info *file_info;
        mount_crypt_stat = &ecryptfs_superblock_to_private(
@@ -222,7 +221,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
                rc = -ENOMEM;
                goto out;
        }
-        lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
        crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
        mutex_lock(&crypt_stat->cs_mutex);
        if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e0f07fb6d56b..5eab400e2590 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -999,8 +999,8 @@ out:
        return rc;
 }
-int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
+static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
-                          struct kstat *stat)
+                                 struct kstat *stat)
 {
        struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
        int rc = 0;
@@ -1021,8 +1021,8 @@ int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
        return rc;
 }
-int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-                     struct kstat *stat)
+                            struct kstat *stat)
 {
        struct kstat lower_stat;
        int rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 2333203a120b..7d52806c2119 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1150,7 +1150,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
        struct ecryptfs_message *msg = NULL;
        char *auth_tok_sig;
        char *payload;
-        size_t payload_len;
+        size_t payload_len = 0;
        int rc;
        rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok);
@@ -1168,7 +1168,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
        rc = ecryptfs_send_message(payload, payload_len, &msg_ctx);
        if (rc) {
                ecryptfs_printk(KERN_ERR, "Error sending message to "
-                                "ecryptfsd\n");
+                                "ecryptfsd: %d\n", rc);
                goto out;
        }
        rc = ecryptfs_wait_for_response(msg_ctx, &msg);
@@ -1202,8 +1202,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
                                  crypt_stat->key_size);
        }
 out:
-        if (msg)
+        kfree(msg);
-                kfree(msg);
        return rc;
 }
@@ -1989,7 +1988,7 @@ pki_encrypt_session_key(struct key *auth_tok_key,
        rc = ecryptfs_send_message(payload, payload_len, &msg_ctx);
        if (rc) {
                ecryptfs_printk(KERN_ERR, "Error sending message to "
-                                "ecryptfsd\n");
+                                "ecryptfsd: %d\n", rc);
                goto out;
        }
        rc = ecryptfs_wait_for_response(msg_ctx, &msg);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 4e0886c9e5c4..e924cf45aad9 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -629,6 +629,7 @@ static struct file_system_type ecryptfs_fs_type = {
        .kill_sb = ecryptfs_kill_block_super,
        .fs_flags = 0
 };
+MODULE_ALIAS_FS("ecryptfs");
 /**
 * inode_info_init_once
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 8d7a577ae497..49ff8ea08f1c 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -97,8 +97,7 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
 void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
 {
        list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
-        if (msg_ctx->msg)
+        kfree(msg_ctx->msg);
-                kfree(msg_ctx->msg);
        msg_ctx->msg = NULL;
        msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
 }
@@ -283,7 +282,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
        int rc;
        rc = ecryptfs_find_daemon_by_euid(&daemon);
-        if (rc || !daemon) {
+        if (rc) {
                rc = -ENOTCONN;
                goto out;
        }
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 2002431ef9a0..c6f57a74a559 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -33,6 +33,7 @@ static struct file_system_type efs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("efs");
 static struct pt_types sgi_pt_types[] = {
        {0x00,          "SGI vh"},
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 5e59280d42d7..9d9763328734 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -1010,6 +1010,7 @@ static struct file_system_type exofs_type = {
        .mount          = exofs_mount,
        .kill_sb        = generic_shutdown_super,
 };
+MODULE_ALIAS_FS("exofs");
 static int __init init_exofs(void)
 {
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 8f370e012e61..7cadd823bb31 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -118,7 +118,6 @@ void ext2_free_inode (struct inode * inode)
         * as writing the quota to disk may need the lock as well.
         */
        /* Quota is already initialized in iput() */
-        ext2_xattr_delete_inode(inode);
        dquot_free_inode(inode);
        dquot_drop(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c3881e56662e..fe60cc1117d8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -34,6 +34,7 @@
 #include "ext2.h"
 #include "acl.h"
 #include "xip.h"
+#include "xattr.h"
 static int __ext2_write_inode(struct inode *inode, int do_sync);
@@ -88,6 +89,7 @@ void ext2_evict_inode(struct inode * inode)
                inode->i_size = 0;
                if (inode->i_blocks)
                        ext2_truncate_blocks(inode, 0);
+                ext2_xattr_delete_inode(inode);
        }
        invalidate_inode_buffers(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7f68c8114026..288534920fe5 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1536,6 +1536,7 @@ static struct file_system_type ext2_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("ext2");
 static int __init init_ext2_fs(void)
 {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 5546ca225ffe..fb5120a5505c 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -353,7 +353,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
        return bdev;
 fail:
-        ext3_msg(sb, "error: failed to open journal device %s: %ld",
+        ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld",
                __bdevname(dev, b), PTR_ERR(bdev));
        return NULL;
@@ -887,7 +887,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
        /*todo: use simple_strtoll with >32bit ext3 */
        sb_block = simple_strtoul(options, &options, 0);
        if (*options && *options != ',') {
-                ext3_msg(sb, "error: invalid sb specification: %s",
+                ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s",
                       (char *) *data);
                return 1;
        }
@@ -3068,6 +3068,7 @@ static struct file_system_type ext3_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("ext3");
 static int __init init_ext3_fs(void)
 {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4a01ba315262..3b83cd604796 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -335,9 +335,9 @@ struct ext4_group_desc
 */
 struct flex_groups {
-        atomic_t free_inodes;
+        atomic64_t      free_clusters;
-        atomic_t free_clusters;
+        atomic_t        free_inodes;
-        atomic_t used_dirs;
+        atomic_t        used_dirs;
 };
 #define EXT4_BG_INODE_UNINIT    0x0001 /* Inode table/bitmap not in use */
@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 extern int __init ext4_init_pageio(void);
 extern void ext4_add_complete_io(ext4_io_end_t *io_end);
 extern void ext4_exit_pageio(void);
-extern void ext4_ioend_wait(struct inode *);
+extern void ext4_ioend_shutdown(struct inode *);
 extern void ext4_free_io_end(ext4_io_end_t *io);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
 extern void ext4_end_io_work(struct work_struct *work);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 28dd8eeea6a9..56efcaadf848 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
        unsigned short ext1_ee_len, ext2_ee_len, max_len;
        /*
-         * Make sure that either both extents are uninitialized, or
+         * Make sure that both extents are initialized. We don't merge
-         * both are _not_.
+         * uninitialized extents so that we can be sure that end_io code has
+         * the extent that was written properly split out and conversion to
+         * initialized is trivial.
         */
-        if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
+        if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
                return 0;
        if (ext4_ext_is_uninitialized(ex1))
@@ -2923,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle,
 {
        ext4_fsblk_t newblock;
        ext4_lblk_t ee_block;
-        struct ext4_extent *ex, newex, orig_ex;
+        struct ext4_extent *ex, newex, orig_ex, zero_ex;
        struct ext4_extent *ex2 = NULL;
        unsigned int ee_len, depth;
        int err = 0;
@@ -2943,6 +2945,10 @@ static int ext4_split_extent_at(handle_t *handle,
        newblock = split - ee_block + ext4_ext_pblock(ex);
        BUG_ON(split < ee_block || split >= (ee_block + ee_len));
+        BUG_ON(!ext4_ext_is_uninitialized(ex) &&
+               split_flag & (EXT4_EXT_MAY_ZEROOUT |
+                             EXT4_EXT_MARK_UNINIT1 |
+                             EXT4_EXT_MARK_UNINIT2));
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
@@ -2990,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle,
        err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
        if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
                if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
-                        if (split_flag & EXT4_EXT_DATA_VALID1)
+                        if (split_flag & EXT4_EXT_DATA_VALID1) {
                                err = ext4_ext_zeroout(inode, ex2);
-                        else
+                                zero_ex.ee_block = ex2->ee_block;
+                                zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
+                                ext4_ext_store_pblock(&zero_ex,
+                                                      ext4_ext_pblock(ex2));
+                        } else {
                                err = ext4_ext_zeroout(inode, ex);
-                } else
+                                zero_ex.ee_block = ex->ee_block;
+                                zero_ex.ee_len = ext4_ext_get_actual_len(ex);
+                                ext4_ext_store_pblock(&zero_ex,
+                                                      ext4_ext_pblock(ex));
+                        }
+                } else {
                        err = ext4_ext_zeroout(inode, &orig_ex);
+                        zero_ex.ee_block = orig_ex.ee_block;
+                        zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
+                        ext4_ext_store_pblock(&zero_ex,
+                                              ext4_ext_pblock(&orig_ex));
+                }
                if (err)
                        goto fix_extent_len;
@@ -3003,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle,
                ex->ee_len = cpu_to_le16(ee_len);
                ext4_ext_try_to_merge(handle, inode, path, ex);
                err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+                if (err)
+                        goto fix_extent_len;
+                /* update extent status tree */
+                err = ext4_es_zeroout(inode, &zero_ex);
                goto out;
        } else if (err)
                goto fix_extent_len;
@@ -3041,6 +3067,7 @@ static int ext4_split_extent(handle_t *handle,
        int err = 0;
        int uninitialized;
        int split_flag1, flags1;
+        int allocated = map->m_len;
        depth = ext_depth(inode);
        ex = path[depth].p_ext;
@@ -3060,20 +3087,29 @@ static int ext4_split_extent(handle_t *handle,
                                map->m_lblk + map->m_len, split_flag1, flags1);
                if (err)
                        goto out;
+        } else {
+                allocated = ee_len - (map->m_lblk - ee_block);
        }
+        /*
+         * Update path is required because previous ext4_split_extent_at() may
+         * result in split of original leaf or extent zeroout.
+         */
        ext4_ext_drop_refs(path);
        path = ext4_ext_find_extent(inode, map->m_lblk, path);
        if (IS_ERR(path))
                return PTR_ERR(path);
+        depth = ext_depth(inode);
+        ex = path[depth].p_ext;
+        uninitialized = ext4_ext_is_uninitialized(ex);
+        split_flag1 = 0;
        if (map->m_lblk >= ee_block) {
-                split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
+                split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
-                                            EXT4_EXT_DATA_VALID2);
+                if (uninitialized) {
-                if (uninitialized)
                        split_flag1 |= EXT4_EXT_MARK_UNINIT1;
-                if (split_flag & EXT4_EXT_MARK_UNINIT2)
+                        split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
-                        split_flag1 |= EXT4_EXT_MARK_UNINIT2;
+                                                     EXT4_EXT_MARK_UNINIT2);
+                }
                err = ext4_split_extent_at(handle, inode, path,
                                map->m_lblk, split_flag1, flags);
                if (err)
@@ -3082,7 +3118,7 @@ static int ext4_split_extent(handle_t *handle,
        ext4_ext_show_leaf(inode, path);
 out:
-        return err ? err : map->m_len;
+        return err ? err : allocated;
 }
 /*
@@ -3137,6 +3173,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
        allocated = ee_len - (map->m_lblk - ee_block);
+        zero_ex.ee_len = 0;
        trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
@@ -3227,13 +3264,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
        if (EXT4_EXT_MAY_ZEROOUT & split_flag)
                max_zeroout = sbi->s_extent_max_zeroout_kb >>
-                        inode->i_sb->s_blocksize_bits;
+                        (inode->i_sb->s_blocksize_bits - 10);
        /* If extent is less than s_max_zeroout_kb, zeroout directly */
        if (max_zeroout && (ee_len <= max_zeroout)) {
                err = ext4_ext_zeroout(inode, ex);
                if (err)
                        goto out;
+                zero_ex.ee_block = ex->ee_block;
+                zero_ex.ee_len = ext4_ext_get_actual_len(ex);
+                ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
@@ -3292,6 +3332,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                err = allocated;
 out:
+        /* If we have gotten a failure, don't zero out status tree */
+        if (!err)
+                err = ext4_es_zeroout(inode, &zero_ex);
        return err ? err : allocated;
 }
@@ -3374,8 +3417,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
                "block %llu, max_blocks %u\n", inode->i_ino,
                  (unsigned long long)ee_block, ee_len);
-        /* If extent is larger than requested then split is required */
+        /* If extent is larger than requested it is a clear sign that we still
+         * have some extent state machine issues left. So extent_split is still
+         * required.
+         * TODO: Once all related issues will be fixed this situation should be
+         * illegal.
+         */
        if (ee_block != map->m_lblk || ee_len > map->m_len) {
+#ifdef EXT4_DEBUG
+                ext4_warning("Inode (%ld) finished: extent logical block %llu,"
+                             " len %u; IO logical block %llu, len %u\n",
+                             inode->i_ino, (unsigned long long)ee_block, ee_len,
+                             (unsigned long long)map->m_lblk, map->m_len);
+#endif
                err = ext4_split_unwritten_extents(handle, inode, map, path,
                                                   EXT4_GET_BLOCKS_CONVERT);
                if (err < 0)
@@ -3626,6 +3680,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                                                 path, map->m_len);
                } else
                        err = ret;
+                map->m_flags |= EXT4_MAP_MAPPED;
+                if (allocated > map->m_len)
+                        allocated = map->m_len;
+                map->m_len = allocated;
                goto out2;
        }
        /* buffered IO case */
@@ -3675,6 +3733,7 @@ out:
                                        allocated - map->m_len);
                allocated = map->m_len;
        }
+        map->m_len = allocated;
        /*
         * If we have done fallocate with the offset that is already
@@ -4106,9 +4165,6 @@ got_allocated_blocks:
                        }
                } else {
                        BUG_ON(allocated_clusters < reserved_clusters);
-                        /* We will claim quota for all newly allocated blocks.*/
-                        ext4_da_update_reserve_space(inode, allocated_clusters,
-                                                        1);
                        if (reserved_clusters < allocated_clusters) {
                                struct ext4_inode_info *ei = EXT4_I(inode);
                                int reservation = allocated_clusters -
@@ -4159,6 +4215,15 @@ got_allocated_blocks:
                                ei->i_reserved_data_blocks += reservation;
                                spin_unlock(&ei->i_block_reservation_lock);
                        }
+                        /*
+                         * We will claim quota for all newly allocated blocks.
+                         * We're updating the reserved space *after* the
+                         * correction above so we do not accidentally free
+                         * all the metadata reservation because we might
+                         * actually need it later on.
+                         */
+                        ext4_da_update_reserve_space(inode, allocated_clusters,
+                                                        1);
                }
        }
@@ -4368,8 +4433,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (len <= EXT_UNINIT_MAX_LEN << blkbits)
                flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
-        /* Prevent race condition between unwritten */
-        ext4_flush_unwritten_io(inode);
 retry:
        while (ret >= 0 && ret < max_blocks) {
                map.m_lblk = map.m_lblk + ret;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 95796a1b7522..fe3337a85ede 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 static int ext4_es_can_be_merged(struct extent_status *es1,
                                 struct extent_status *es2)
 {
-        if (es1->es_lblk + es1->es_len != es2->es_lblk)
+        if (ext4_es_status(es1) != ext4_es_status(es2))
                return 0;
-        if (ext4_es_status(es1) != ext4_es_status(es2))
+        if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
                return 0;
-        if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
+        if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
-            (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
                return 0;
-        return 1;
+        if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
+            (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
+                return 1;
+        if (ext4_es_is_hole(es1))
+                return 1;
+        /* we need to check delayed extent is without unwritten status */
+        if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
+                return 1;
+        return 0;
 }
 static struct extent_status *
@@ -389,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
        return es;
 }
+#ifdef ES_AGGRESSIVE_TEST
+static void ext4_es_insert_extent_ext_check(struct inode *inode,
+                                            struct extent_status *es)
+{
+        struct ext4_ext_path *path = NULL;
+        struct ext4_extent *ex;
+        ext4_lblk_t ee_block;
+        ext4_fsblk_t ee_start;
+        unsigned short ee_len;
+        int depth, ee_status, es_status;
+        path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
+        if (IS_ERR(path))
+                return;
+        depth = ext_depth(inode);
+        ex = path[depth].p_ext;
+        if (ex) {
+                ee_block = le32_to_cpu(ex->ee_block);
+                ee_start = ext4_ext_pblock(ex);
+                ee_len = ext4_ext_get_actual_len(ex);
+                ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
+                es_status = ext4_es_is_unwritten(es) ? 1 : 0;
+                /*
+                 * Make sure ex and es are not overlap when we try to insert
+                 * a delayed/hole extent.
+                 */
+                if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
+                        if (in_range(es->es_lblk, ee_block, ee_len)) {
+                                pr_warn("ES insert assertation failed for "
+                                        "inode: %lu we can find an extent "
+                                        "at block [%d/%d/%llu/%c], but we "
+                                        "want to add an delayed/hole extent "
+                                        "[%d/%d/%llu/%llx]\n",
+                                        inode->i_ino, ee_block, ee_len,
+                                        ee_start, ee_status ? 'u' : 'w',
+                                        es->es_lblk, es->es_len,
+                                        ext4_es_pblock(es), ext4_es_status(es));
+                        }
+                        goto out;
+                }
+                /*
+                 * We don't check ee_block == es->es_lblk, etc. because es
+                 * might be a part of whole extent, vice versa.
+                 */
+                if (es->es_lblk < ee_block ||
+                    ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
+                        pr_warn("ES insert assertation failed for inode: %lu "
+                                "ex_status [%d/%d/%llu/%c] != "
+                                "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
+                                ee_block, ee_len, ee_start,
+                                ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
+                                ext4_es_pblock(es), es_status ? 'u' : 'w');
+                        goto out;
+                }
+                if (ee_status ^ es_status) {
+                        pr_warn("ES insert assertation failed for inode: %lu "
+                                "ex_status [%d/%d/%llu/%c] != "
+                                "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
+                                ee_block, ee_len, ee_start,
+                                ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
+                                ext4_es_pblock(es), es_status ? 'u' : 'w');
+                }
+        } else {
+                /*
+                 * We can't find an extent on disk.  So we need to make sure
+                 * that we don't want to add an written/unwritten extent.
+                 */
+                if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
+                        pr_warn("ES insert assertation failed for inode: %lu "
+                                "can't find an extent at block %d but we want "
+                                "to add an written/unwritten extent "
+                                "[%d/%d/%llu/%llx]\n", inode->i_ino,
+                                es->es_lblk, es->es_lblk, es->es_len,
+                                ext4_es_pblock(es), ext4_es_status(es));
+                }
+        }
+out:
+        if (path) {
+                ext4_ext_drop_refs(path);
+                kfree(path);
+        }
+}
+static void ext4_es_insert_extent_ind_check(struct inode *inode,
+                                            struct extent_status *es)
+{
+        struct ext4_map_blocks map;
+        int retval;
+        /*
+         * Here we call ext4_ind_map_blocks to lookup a block mapping because
+         * 'Indirect' structure is defined in indirect.c.  So we couldn't
+         * access direct/indirect tree from outside.  It is too dirty to define
+         * this function in indirect.c file.
+         */
+        map.m_lblk = es->es_lblk;
+        map.m_len = es->es_len;
+        retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
+        if (retval > 0) {
+                if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
+                        /*
+                         * We want to add a delayed/hole extent but this
+                         * block has been allocated.
+                         */
+                        pr_warn("ES insert assertation failed for inode: %lu "
+                                "We can find blocks but we want to add a "
+                                "delayed/hole extent [%d/%d/%llu/%llx]\n",
+                                inode->i_ino, es->es_lblk, es->es_len,
+                                ext4_es_pblock(es), ext4_es_status(es));
+                        return;
+                } else if (ext4_es_is_written(es)) {
+                        if (retval != es->es_len) {
+                                pr_warn("ES insert assertation failed for "
+                                        "inode: %lu retval %d != es_len %d\n",
+                                        inode->i_ino, retval, es->es_len);
+                                return;
+                        }
+                        if (map.m_pblk != ext4_es_pblock(es)) {
+                                pr_warn("ES insert assertation failed for "
+                                        "inode: %lu m_pblk %llu != "
+                                        "es_pblk %llu\n",
+                                        inode->i_ino, map.m_pblk,
+                                        ext4_es_pblock(es));
+                                return;
+                        }
+                } else {
+                        /*
+                         * We don't need to check unwritten extent because
+                         * indirect-based file doesn't have it.
+                         */
+                        BUG_ON(1);
+                }
+        } else if (retval == 0) {
+                if (ext4_es_is_written(es)) {
+                        pr_warn("ES insert assertation failed for inode: %lu "
+                                "We can't find the block but we want to add "
+                                "an written extent [%d/%d/%llu/%llx]\n",
+                                inode->i_ino, es->es_lblk, es->es_len,
+                                ext4_es_pblock(es), ext4_es_status(es));
+                        return;
+                }
+        }
+}
+static inline void ext4_es_insert_extent_check(struct inode *inode,
+                                               struct extent_status *es)
+{
+        /*
+         * We don't need to worry about the race condition because
+         * caller takes i_data_sem locking.
+         */
+        BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
+        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+                ext4_es_insert_extent_ext_check(inode, es);
+        else
+                ext4_es_insert_extent_ind_check(inode, es);
+}
+#else
+static inline void ext4_es_insert_extent_check(struct inode *inode,
+                                               struct extent_status *es)
+{
+}
+#endif
 static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
 {
        struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
@@ -471,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
        ext4_es_store_status(&newes, status);
        trace_ext4_es_insert_extent(inode, &newes);
+        ext4_es_insert_extent_check(inode, &newes);
        write_lock(&EXT4_I(inode)->i_es_lock);
        err = __es_remove_extent(inode, lblk, end);
        if (err != 0)
@@ -669,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
        return err;
 }
+int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+        ext4_lblk_t  ee_block;
+        ext4_fsblk_t ee_pblock;
+        unsigned int ee_len;
+        ee_block  = le32_to_cpu(ex->ee_block);
+        ee_len    = ext4_ext_get_actual_len(ex);
+        ee_pblock = ext4_ext_pblock(ex);
+        if (ee_len == 0)
+                return 0;
+        return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
+                                     EXTENT_STATUS_WRITTEN);
+}
 static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct ext4_sb_info *sbi = container_of(shrink,
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f190dfe969da..d8e2d4dc311e 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -21,6 +21,12 @@
 #endif
 /*
+ * With ES_AGGRESSIVE_TEST defined, the result of es caching will be
+ * checked with old map_block's result.
+ */
+#define ES_AGGRESSIVE_TEST__
+/*
 * These flags live in the high bits of extent_status.es_pblk
 */
 #define EXTENT_STATUS_WRITTEN   (1ULL << 63)
@@ -33,6 +39,8 @@
                                 EXTENT_STATUS_DELAYED | \
                                 EXTENT_STATUS_HOLE)
+struct ext4_extent;
 struct extent_status {
        struct rb_node rb_node;
        ext4_lblk_t es_lblk;    /* first logical block extent covers */
@@ -58,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
                                        struct extent_status *es);
 extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
                                 struct extent_status *es);
+extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
 static inline int ext4_es_is_written(struct extent_status *es)
 {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 32fd2b9075dd..6c5bb8d993fe 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -324,8 +324,8 @@ error_return:
 }
 struct orlov_stats {
+        __u64 free_clusters;
        __u32 free_inodes;
-        __u32 free_clusters;
        __u32 used_dirs;
 };
@@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
        if (flex_size > 1) {
                stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
-                stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
+                stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
                stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
                return;
        }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9ea0cde3fa9e..b3a5213bc73e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode)
        trace_ext4_evict_inode(inode);
-        ext4_ioend_wait(inode);
        if (inode->i_nlink) {
                /*
                 * When journalling data dirty buffers are tracked only in the
@@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode)
                 * don't use page cache.
                 */
                if (ext4_should_journal_data(inode) &&
-                    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+                    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+                    inode->i_ino != EXT4_JOURNAL_INO) {
                        journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
                        tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
@@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode)
                        filemap_write_and_wait(&inode->i_data);
                }
                truncate_inode_pages(&inode->i_data, 0);
+                ext4_ioend_shutdown(inode);
                goto no_delete;
        }
@@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode)
        if (ext4_should_order_data(inode))
                ext4_begin_ordered_truncate(inode, 0);
        truncate_inode_pages(&inode->i_data, 0);
+        ext4_ioend_shutdown(inode);
        if (is_bad_inode(inode))
                goto no_delete;
@@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
        return num;
 }
+#ifdef ES_AGGRESSIVE_TEST
+static void ext4_map_blocks_es_recheck(handle_t *handle,
+                                       struct inode *inode,
+                                       struct ext4_map_blocks *es_map,
+                                       struct ext4_map_blocks *map,
+                                       int flags)
+{
+        int retval;
+        map->m_flags = 0;
+        /*
+         * There is a race window that the result is not the same.
+         * e.g. xfstests #223 when dioread_nolock enables.  The reason
+         * is that we lookup a block mapping in extent status tree with
+         * out taking i_data_sem.  So at the time the unwritten extent
+         * could be converted.
+         */
+        if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+                down_read((&EXT4_I(inode)->i_data_sem));
+        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+                retval = ext4_ext_map_blocks(handle, inode, map, flags &
+                                             EXT4_GET_BLOCKS_KEEP_SIZE);
+        } else {
+                retval = ext4_ind_map_blocks(handle, inode, map, flags &
+                                             EXT4_GET_BLOCKS_KEEP_SIZE);
+        }
+        if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
+                up_read((&EXT4_I(inode)->i_data_sem));
+        /*
+         * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
+         * because it shouldn't be marked in es_map->m_flags.
+         */
+        map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
+        /*
+         * We don't check m_len because extent will be collpased in status
+         * tree.  So the m_len might not equal.
+         */
+        if (es_map->m_lblk != map->m_lblk ||
+            es_map->m_flags != map->m_flags ||
+            es_map->m_pblk != map->m_pblk) {
+                printk("ES cache assertation failed for inode: %lu "
+                       "es_cached ex [%d/%d/%llu/%x] != "
+                       "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
+                       inode->i_ino, es_map->m_lblk, es_map->m_len,
+                       es_map->m_pblk, es_map->m_flags, map->m_lblk,
+                       map->m_len, map->m_pblk, map->m_flags,
+                       retval, flags);
+        }
+}
+#endif /* ES_AGGRESSIVE_TEST */
 /*
 * The ext4_map_blocks() function tries to look up the requested blocks,
 * and returns if the blocks are already mapped.
@@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 {
        struct extent_status es;
        int retval;
+#ifdef ES_AGGRESSIVE_TEST
+        struct ext4_map_blocks orig_map;
+        memcpy(&orig_map, map, sizeof(*map));
+#endif
        map->m_flags = 0;
        ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
@@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                } else {
                        BUG_ON(1);
                }
+#ifdef ES_AGGRESSIVE_TEST
+                ext4_map_blocks_es_recheck(handle, inode, map,
+                                           &orig_map, flags);
+#endif
                goto found;
        }
@@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                int ret;
                unsigned long long status;
+#ifdef ES_AGGRESSIVE_TEST
+                if (retval != map->m_len) {
+                        printk("ES len assertation failed for inode: %lu "
+                               "retval %d != map->m_len %d "
+                               "in %s (lookup)\n", inode->i_ino, retval,
+                               map->m_len, __func__);
+                }
+#endif
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
                if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -643,6 +714,24 @@ found:
                int ret;
                unsigned long long status;
+#ifdef ES_AGGRESSIVE_TEST
+                if (retval != map->m_len) {
+                        printk("ES len assertation failed for inode: %lu "
+                               "retval %d != map->m_len %d "
+                               "in %s (allocation)\n", inode->i_ino, retval,
+                               map->m_len, __func__);
+                }
+#endif
+                /*
+                 * If the extent has been zeroed out, we don't need to update
+                 * extent status tree.
+                 */
+                if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
+                    ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+                        if (ext4_es_is_written(&es))
+                                goto has_zeroout;
+                }
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
                if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -655,6 +744,7 @@ found:
                        retval = ret;
        }
+has_zeroout:
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
                int ret = check_block_validity(inode, map);
@@ -1216,6 +1306,55 @@ static int ext4_journalled_write_end(struct file *file,
 }
 /*
+ * Reserve a metadata for a single block located at lblock
+ */
+static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
+{
+        int retries = 0;
+        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+        struct ext4_inode_info *ei = EXT4_I(inode);
+        unsigned int md_needed;
+        ext4_lblk_t save_last_lblock;
+        int save_len;
+        /*
+         * recalculate the amount of metadata blocks to reserve
+         * in order to allocate nrblocks
+         * worse case is one extent per block
+         */
+repeat:
+        spin_lock(&ei->i_block_reservation_lock);
+        /*
+         * ext4_calc_metadata_amount() has side effects, which we have
+         * to be prepared undo if we fail to claim space.
+         */
+        save_len = ei->i_da_metadata_calc_len;
+        save_last_lblock = ei->i_da_metadata_calc_last_lblock;
+        md_needed = EXT4_NUM_B2C(sbi,
+                                 ext4_calc_metadata_amount(inode, lblock));
+        trace_ext4_da_reserve_space(inode, md_needed);
+        /*
+         * We do still charge estimated metadata to the sb though;
+         * we cannot afford to run out of free blocks.
+         */
+        if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
+                ei->i_da_metadata_calc_len = save_len;
+                ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+                spin_unlock(&ei->i_block_reservation_lock);
+                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+                        cond_resched();
+                        goto repeat;
+                }
+                return -ENOSPC;
+        }
+        ei->i_reserved_meta_blocks += md_needed;
+        spin_unlock(&ei->i_block_reservation_lock);
+        return 0;       /* success */
+}
+/*
 * Reserve a single cluster located at lblock
 */
 static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1263,7 +1402,7 @@ repeat:
                ei->i_da_metadata_calc_last_lblock = save_last_lblock;
                spin_unlock(&ei->i_block_reservation_lock);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
-                        yield();
+                        cond_resched();
                        goto repeat;
                }
                dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
@@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
        struct extent_status es;
        int retval;
        sector_t invalid_block = ~((sector_t) 0xffff);
+#ifdef ES_AGGRESSIVE_TEST
+        struct ext4_map_blocks orig_map;
+        memcpy(&orig_map, map, sizeof(*map));
+#endif
        if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
                invalid_block = ~0;
@@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                else
                        BUG_ON(1);
+#ifdef ES_AGGRESSIVE_TEST
+                ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
+#endif
                return retval;
        }
@@ -1843,8 +1990,11 @@ add_delayed:
                 * XXX: __block_prepare_write() unmaps passed block,
                 * is it OK?
                 */
-                /* If the block was allocated from previously allocated cluster,
+                /*
-                 * then we dont need to reserve it again. */
+                 * If the block was allocated from previously allocated cluster,
+                 * then we don't need to reserve it again. However we still need
+                 * to reserve metadata for every block we're going to write.
+                 */
                if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
                        ret = ext4_da_reserve_space(inode, iblock);
                        if (ret) {
@@ -1852,6 +2002,13 @@ add_delayed:
                                retval = ret;
                                goto out_unlock;
                        }
+                } else {
+                        ret = ext4_da_reserve_metadata(inode, iblock);
+                        if (ret) {
+                                /* not enough space to reserve */
+                                retval = ret;
+                                goto out_unlock;
+                        }
                }
                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -1873,6 +2030,15 @@ add_delayed:
                int ret;
                unsigned long long status;
+#ifdef ES_AGGRESSIVE_TEST
+                if (retval != map->m_len) {
+                        printk("ES len assertation failed for inode: %lu "
+                               "retval %d != map->m_len %d "
+                               "in %s (lookup)\n", inode->i_ino, retval,
+                               map->m_len, __func__);
+                }
+#endif
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
                ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
        trace_ext4_releasepage(page);
-        WARN_ON(PageChecked(page));
+        /* Page has dirty journalled data -> cannot release */
-        if (!page_has_buffers(page))
+        if (PageChecked(page))
                return 0;
        if (journal)
                return jbd2_journal_try_to_free_buffers(journal, page, wait);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7bb713a46fe4..ee6614bdb639 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi,
                                                          ac->ac_b_ex.fe_group);
-                atomic_sub(ac->ac_b_ex.fe_len,
+                atomic64_sub(ac->ac_b_ex.fe_len,
-                           &sbi->s_flex_groups[flex_group].free_clusters);
+                             &sbi->s_flex_groups[flex_group].free_clusters);
        }
        err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3692,11 +3692,7 @@ repeat:
        if (free < needed && busy) {
                busy = 0;
                ext4_unlock_group(sb, group);
-                /*
+                cond_resched();
-                 * Yield the CPU here so that we don't get soft lockup
-                 * in non preempt case.
-                 */
-                yield();
                goto repeat;
        }
@@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                        ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
                        /* let others to free the space */
-                        yield();
+                        cond_resched();
                        ar->len = ar->len >> 1;
                }
                if (!ar->len) {
@@ -4464,7 +4460,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
        struct buffer_head *bitmap_bh = NULL;
        struct super_block *sb = inode->i_sb;
        struct ext4_group_desc *gdp;
-        unsigned long freed = 0;
        unsigned int overflow;
        ext4_grpblk_t bit;
        struct buffer_head *gd_bh;
@@ -4666,14 +4661,12 @@ do_more:
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-                atomic_add(count_clusters,
+                atomic64_add(count_clusters,
-                           &sbi->s_flex_groups[flex_group].free_clusters);
+                             &sbi->s_flex_groups[flex_group].free_clusters);
        }
        ext4_mb_unload_buddy(&e4b);
-        freed += count;
        if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
                dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
@@ -4811,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-                atomic_add(EXT4_NUM_B2C(sbi, blocks_freed),
+                atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
-                           &sbi->s_flex_groups[flex_group].free_clusters);
+                             &sbi->s_flex_groups[flex_group].free_clusters);
        }
        ext4_mb_unload_buddy(&e4b);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 4e81d47aa8cb..33e1c086858b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -32,16 +32,18 @@
 */
 static inline int
 get_ext_path(struct inode *inode, ext4_lblk_t lblock,
-                struct ext4_ext_path **path)
+                struct ext4_ext_path **orig_path)
 {
        int ret = 0;
+        struct ext4_ext_path *path;
-        *path = ext4_ext_find_extent(inode, lblock, *path);
+        path = ext4_ext_find_extent(inode, lblock, *orig_path);
-        if (IS_ERR(*path)) {
+        if (IS_ERR(path))
-                ret = PTR_ERR(*path);
+                ret = PTR_ERR(path);
-                *path = NULL;
+        else if (path[ext_depth(inode)].p_ext == NULL)
-        } else if ((*path)[ext_depth(inode)].p_ext == NULL)
                ret = -ENODATA;
+        else
+                *orig_path = path;
        return ret;
 }
@@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
 {
        struct ext4_ext_path *path = NULL;
        struct ext4_extent *ext;
+        int ret = 0;
        ext4_lblk_t last = from + count;
        while (from < last) {
                *err = get_ext_path(inode, from, &path);
                if (*err)
-                        return 0;
+                        goto out;
                ext = path[ext_depth(inode)].p_ext;
-                if (!ext) {
+                if (uninit != ext4_ext_is_uninitialized(ext))
-                        ext4_ext_drop_refs(path);
+                        goto out;
-                        return 0;
-                }
-                if (uninit != ext4_ext_is_uninitialized(ext)) {
-                        ext4_ext_drop_refs(path);
-                        return 0;
-                }
                from += ext4_ext_get_actual_len(ext);
                ext4_ext_drop_refs(path);
        }
-        return 1;
+        ret = 1;
+out:
+        if (path) {
+                ext4_ext_drop_refs(path);
+                kfree(path);
+        }
+        return ret;
 }
 /**
@@ -666,6 +669,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
        int replaced_count = 0;
        int dext_alen;
+        *err = ext4_es_remove_extent(orig_inode, from, count);
+        if (*err)
+                goto out;
+        *err = ext4_es_remove_extent(donor_inode, from, count);
+        if (*err)
+                goto out;
        /* Get the original extent for the block "orig_off" */
        *err = get_ext_path(orig_inode, orig_off, &orig_path);
        if (*err)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 809b31003ecc..047a6de04a0a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -50,11 +50,21 @@ void ext4_exit_pageio(void)
        kmem_cache_destroy(io_page_cachep);
 }
-void ext4_ioend_wait(struct inode *inode)
+/*
+ * This function is called by ext4_evict_inode() to make sure there is
+ * no more pending I/O completion work left to do.
+ */
+void ext4_ioend_shutdown(struct inode *inode)
 {
        wait_queue_head_t *wq = ext4_ioend_wq(inode);
        wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+        /*
+         * We need to make sure the work structure is finished being
+         * used before we let the inode get destroyed.
+         */
+        if (work_pending(&EXT4_I(inode)->i_unwritten_work))
+                cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
 }
 static void put_io_page(struct ext4_io_page *io_page)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b2c8ee56eb98..c169477a62c9 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb,
            sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group;
                flex_group = ext4_flex_group(sbi, group_data[0].group);
-                atomic_add(EXT4_NUM_B2C(sbi, free_blocks),
+                atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
-                           &sbi->s_flex_groups[flex_group].free_clusters);
+                             &sbi->s_flex_groups[flex_group].free_clusters);
                atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
                           &sbi->s_flex_groups[flex_group].free_inodes);
        }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5e6c87836193..5d6d53578124 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -90,6 +90,8 @@ static struct file_system_type ext2_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("ext2");
+MODULE_ALIAS("ext2");
 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
 #else
 #define IS_EXT2_SB(sb) (0)
@@ -104,6 +106,8 @@ static struct file_system_type ext3_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("ext3");
+MODULE_ALIAS("ext3");
 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 #else
 #define IS_EXT3_SB(sb) (0)
@@ -1923,8 +1927,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
                flex_group = ext4_flex_group(sbi, i);
                atomic_add(ext4_free_inodes_count(sb, gdp),
                           &sbi->s_flex_groups[flex_group].free_inodes);
-                atomic_add(ext4_free_group_clusters(sb, gdp),
+                atomic64_add(ext4_free_group_clusters(sb, gdp),
-                           &sbi->s_flex_groups[flex_group].free_clusters);
+                             &sbi->s_flex_groups[flex_group].free_clusters);
                atomic_add(ext4_used_dirs_count(sb, gdp),
                           &sbi->s_flex_groups[flex_group].used_dirs);
        }
@@ -5152,7 +5156,6 @@ static inline int ext2_feature_set_ok(struct super_block *sb)
                return 0;
        return 1;
 }
-MODULE_ALIAS("ext2");
 #else
 static inline void register_as_ext2(void) { }
 static inline void unregister_as_ext2(void) { }
@@ -5185,7 +5188,6 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
                return 0;
        return 1;
 }
-MODULE_ALIAS("ext3");
 #else
 static inline void register_as_ext3(void) { }
 static inline void unregister_as_ext3(void) { }
@@ -5199,6 +5201,7 @@ static struct file_system_type ext4_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("ext4");
 static int __init ext4_init_feat_adverts(void)
 {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8c117649a035..fea6e582a2ed 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -687,6 +687,7 @@ static struct file_system_type f2fs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("f2fs");
 static int __init init_inodecache(void)
 {
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index e2cfda94a28d..081b759cff83 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -668,6 +668,7 @@ static struct file_system_type msdos_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("msdos");
 static int __init init_msdos_fs(void)
 {
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index ac959d655e7d..2da952036a3d 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1073,6 +1073,7 @@ static struct file_system_type vfat_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("vfat");
 static int __init init_vfat_fs(void)
 {
diff --git a/fs/filesystems.c b/fs/filesystems.c
index da165f6adcbf..92567d95ba6a 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -273,7 +273,7 @@ struct file_system_type *get_fs_type(const char *name)
        int len = dot ? dot - name : strlen(name);
        fs = __get_fs_type(name, len);
-        if (!fs && (request_module("%.*s", len, name) == 0))
+        if (!fs && (request_module("fs-%.*s", len, name) == 0))
                fs = __get_fs_type(name, len);
        if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index fed2c8afb3a9..e37eb274e492 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -52,7 +52,6 @@ MODULE_AUTHOR("Christoph Hellwig");
 MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_ALIAS("vxfs"); /* makes mount -t vxfs autoload the module */
 static void             vxfs_put_super(struct super_block *);
@@ -258,6 +257,8 @@ static struct file_system_type vxfs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("vxfs"); /* makes mount -t vxfs autoload the module */
+MODULE_ALIAS("vxfs");
 static int __init
 vxfs_init(void)
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index b7978b9f75ef..a0b0855d00a9 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -341,6 +341,7 @@ static struct file_system_type fuse_ctl_fs_type = {
        .mount          = fuse_ctl_mount,
        .kill_sb        = fuse_ctl_kill_sb,
 };
+MODULE_ALIAS_FS("fusectl");
 int __init fuse_ctl_init(void)
 {
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index df00993ed108..137185c3884f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1117,6 +1117,7 @@ static struct file_system_type fuse_fs_type = {
        .mount          = fuse_mount,
        .kill_sb        = fuse_kill_sb_anon,
 };
+MODULE_ALIAS_FS("fuse");
 #ifdef CONFIG_BLOCK
 static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
@@ -1146,6 +1147,7 @@ static struct file_system_type fuseblk_fs_type = {
        .kill_sb        = fuse_kill_sb_blk,
        .fs_flags       = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
+MODULE_ALIAS_FS("fuseblk");
 static inline int register_fuseblk(void)
 {
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1b612be4b873..60ede2a0f43f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -20,6 +20,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/quotaops.h>
 #include <linux/lockdep.h>
+#include <linux/module.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -1425,6 +1426,7 @@ struct file_system_type gfs2_fs_type = {
        .kill_sb = gfs2_kill_sb,
        .owner = THIS_MODULE,
 };
+MODULE_ALIAS_FS("gfs2");
 struct file_system_type gfs2meta_fs_type = {
        .name = "gfs2meta",
@@ -1432,4 +1434,4 @@ struct file_system_type gfs2meta_fs_type = {
        .mount = gfs2_mount_meta,
        .owner = THIS_MODULE,
 };
+MODULE_ALIAS_FS("gfs2meta");
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index e93ddaadfd1e..bbaaa8a4ee64 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -466,6 +466,7 @@ static struct file_system_type hfs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("hfs");
 static void hfs_init_once(void *p)
 {
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 974c26f96fae..7b87284e46dc 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -654,6 +654,7 @@ static struct file_system_type hfsplus_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("hfsplus");
 static void hfsplus_init_once(void *p)
 {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fbabb906066f..0f6e52d22b84 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -845,15 +845,8 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
                return err;
        if ((attr->ia_valid & ATTR_SIZE) &&
-            attr->ia_size != i_size_read(inode)) {
+            attr->ia_size != i_size_read(inode))
-                int error;
-                error = inode_newsize_ok(inode, attr->ia_size);
-                if (error)
-                        return error;
                truncate_setsize(inode, attr->ia_size);
-        }
        setattr_copy(inode, attr);
        mark_inode_dirty(inode);
@@ -993,6 +986,7 @@ static struct file_system_type hostfs_type = {
        .kill_sb        = hostfs_kill_sb,
        .fs_flags       = 0,
 };
+MODULE_ALIAS_FS("hostfs");
 static int __init init_hostfs(void)
 {
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a3076228523d..a0617e706957 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -688,6 +688,7 @@ static struct file_system_type hpfs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("hpfs");
 static int __init init_hpfs_fs(void)
 {
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 74f55703be49..126d3c2e2dee 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,6 +748,7 @@ static struct file_system_type hppfs_type = {
        .kill_sb        = kill_anon_super,
        .fs_flags       = 0,
 };
+MODULE_ALIAS_FS("hppfs");
 static int __init init_hppfs(void)
 {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7f94e0cbc69c..84e3d856e91d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -896,6 +896,7 @@ static struct file_system_type hugetlbfs_fs_type = {
        .mount          = hugetlbfs_mount,
        .kill_sb        = kill_litter_super,
 };
+MODULE_ALIAS_FS("hugetlbfs");
 static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE];
diff --git a/fs/internal.h b/fs/internal.h
index 507141fceb99..4be78237d896 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,3 +125,8 @@ extern int invalidate_inodes(struct super_block *, bool);
 * dcache.c
 */
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
+/*
+ * read_write.c
+ */
+extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 67ce52507d7d..d9b8aebdeb22 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1556,6 +1556,8 @@ static struct file_system_type iso9660_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("iso9660");
+MODULE_ALIAS("iso9660");
 static int __init init_iso9660_fs(void)
 {
@@ -1593,5 +1595,3 @@ static void __exit exit_iso9660_fs(void)
 module_init(init_iso9660_fs)
 module_exit(exit_iso9660_fs)
 MODULE_LICENSE("GPL");
-/* Actual filesystem name is iso9660, as requested in filesystems.c */
-MODULE_ALIAS("iso9660");
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d6ee5aed56b1..325bc019ed88 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1065,9 +1065,12 @@ out:
 void jbd2_journal_set_triggers(struct buffer_head *bh,
                               struct jbd2_buffer_trigger_type *type)
 {
-        struct journal_head *jh = bh2jh(bh);
+        struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
+        if (WARN_ON(!jh))
+                return;
        jh->b_triggers = type;
+        jbd2_journal_put_journal_head(jh);
 }
 void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
@@ -1119,17 +1122,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
-        struct journal_head *jh = bh2jh(bh);
+        struct journal_head *jh;
        int ret = 0;
-        jbd_debug(5, "journal_head %p\n", jh);
-        JBUFFER_TRACE(jh, "entry");
        if (is_handle_aborted(handle))
                goto out;
-        if (!buffer_jbd(bh)) {
+        jh = jbd2_journal_grab_journal_head(bh);
+        if (!jh) {
                ret = -EUCLEAN;
                goto out;
        }
+        jbd_debug(5, "journal_head %p\n", jh);
+        JBUFFER_TRACE(jh, "entry");
        jbd_lock_bh_state(bh);
@@ -1220,6 +1224,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
        spin_unlock(&journal->j_list_lock);
 out_unlock_bh:
        jbd_unlock_bh_state(bh);
+        jbd2_journal_put_journal_head(jh);
 out:
        JBUFFER_TRACE(jh, "exit");
        WARN_ON(ret);   /* All errors are bugs, so dump the stack */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d3d8799e2187..0defb1cc2a35 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -356,6 +356,7 @@ static struct file_system_type jffs2_fs_type = {
        .mount =        jffs2_mount,
        .kill_sb =      jffs2_kill_sb,
 };
+MODULE_ALIAS_FS("jffs2");
 static int __init init_jffs2_fs(void)
 {
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 060ba638becb..2003e830ed1c 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -833,6 +833,7 @@ static struct file_system_type jfs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("jfs");
 static void init_once(void *foo)
 {
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 345c24b8a6f8..54360293bcb5 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -608,6 +608,7 @@ static struct file_system_type logfs_fs_type = {
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("logfs");
 static int __init logfs_init(void)
 {
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 99541cceb584..df122496f328 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -660,6 +660,7 @@ static struct file_system_type minix_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("minix");
 static int __init init_minix_fs(void)
 {
diff --git a/fs/namei.c b/fs/namei.c
index 961bc1268366..57ae9c8c66bf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -689,8 +689,6 @@ void nd_jump_link(struct nameidata *nd, struct path *path)
        nd->path = *path;
        nd->inode = nd->path.dentry->d_inode;
        nd->flags |= LOOKUP_JUMPED;
-        BUG_ON(nd->inode->i_op->follow_link);
 }
 static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
diff --git a/fs/namespace.c b/fs/namespace.c
index 50ca17d3cb45..d581e45c0a9f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
        }
        mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+        /* Don't allow unprivileged users to change mount flags */
+        if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
+                mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
        atomic_inc(&sb->s_active);
        mnt->mnt.mnt_sb = sb;
        mnt->mnt.mnt_root = dget(root);
@@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
        if (readonly_request == __mnt_is_readonly(mnt))
                return 0;
+        if (mnt->mnt_flags & MNT_LOCK_READONLY)
+                return -EPERM;
        if (readonly_request)
                error = mnt_make_readonly(real_mount(mnt));
        else
@@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        /* First pass: copy the tree topology */
        copy_flags = CL_COPY_ALL | CL_EXPIRE;
        if (user_ns != mnt_ns->user_ns)
-                copy_flags |= CL_SHARED_TO_SLAVE;
+                copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
        new = copy_tree(old, old->mnt.mnt_root, copy_flags);
        if (IS_ERR(new)) {
                up_write(&namespace_sem);
@@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt)
        return check_mnt(real_mount(mnt));
 }
+bool current_chrooted(void)
+{
+        /* Does the current process have a non-standard root */
+        struct path ns_root;
+        struct path fs_root;
+        bool chrooted;
+        /* Find the namespace root */
+        ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
+        ns_root.dentry = ns_root.mnt->mnt_root;
+        path_get(&ns_root);
+        while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
+                ;
+        get_fs_root(current->fs, &fs_root);
+        chrooted = !path_equal(&fs_root, &ns_root);
+        path_put(&fs_root);
+        path_put(&ns_root);
+        return chrooted;
+}
+void update_mnt_policy(struct user_namespace *userns)
+{
+        struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+        struct mount *mnt;
+        down_read(&namespace_sem);
+        list_for_each_entry(mnt, &ns->list, mnt_list) {
+                switch (mnt->mnt.mnt_sb->s_magic) {
+                case SYSFS_MAGIC:
+                        userns->may_mount_sysfs = true;
+                        break;
+                case PROC_SUPER_MAGIC:
+                        userns->may_mount_proc = true;
+                        break;
+                }
+                if (userns->may_mount_sysfs && userns->may_mount_proc)
+                        break;
+        }
+        up_read(&namespace_sem);
+}
 static void *mntns_get(struct task_struct *task)
 {
        struct mnt_namespace *ns = NULL;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 7dafd6899a62..26910c8154da 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -1051,6 +1051,7 @@ static struct file_system_type ncp_fs_type = {
        .kill_sb        = kill_anon_super,
        .fs_flags       = FS_BINARY_MOUNTDATA,
 };
+MODULE_ALIAS_FS("ncpfs");
 static int __init init_ncp_fs(void)
 {
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839bc17b..6fc7b5cae92b 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
        bl_pipe_msg.bl_wq = &nn->bl_wq;
        memset(msg, 0, sizeof(*msg));
-        msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
+        msg->len = sizeof(bl_msg) + bl_msg.totallen;
+        msg->data = kzalloc(msg->len, GFP_NOFS);
        if (!msg->data)
                goto out;
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
        memcpy(msg->data, &bl_msg, sizeof(bl_msg));
        dataptr = (uint8_t *) msg->data;
        memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
-        msg->len = sizeof(bl_msg) + bl_msg.totallen;
        add_wait_queue(&nn->bl_wq, &wq);
        if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dc0f98dfa717..c516da5873fd 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -726,9 +726,9 @@ out1:
        return ret;
 }
-static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
 {
-        return key_instantiate_and_link(key, data, strlen(data) + 1,
+        return key_instantiate_and_link(key, data, datalen,
                                        id_resolver_cache->thread_keyring,
                                        authkey);
 }
@@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
                struct key *key, struct key *authkey)
 {
        char id_str[NFS_UINT_MAXLEN];
+        size_t len;
        int ret = -ENOKEY;
        /* ret = -ENOKEY */
@@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
        case IDMAP_CONV_NAMETOID:
                if (strcmp(upcall->im_name, im->im_name) != 0)
                        break;
-                sprintf(id_str, "%d", im->im_id);
+                /* Note: here we store the NUL terminator too */
-                ret = nfs_idmap_instantiate(key, authkey, id_str);
+                len = sprintf(id_str, "%d", im->im_id) + 1;
+                ret = nfs_idmap_instantiate(key, authkey, id_str, len);
                break;
        case IDMAP_CONV_IDTONAME:
                if (upcall->im_id != im->im_id)
                        break;
-                ret = nfs_idmap_instantiate(key, authkey, im->im_name);
+                len = strlen(im->im_name);
+                ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
                break;
        default:
                ret = -EINVAL;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb044c109..4fb234d3aefb 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
 {
        if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
                return;
-        clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
        pnfs_return_layout(inode);
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb0f901..26431cf62ddb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2632,7 +2632,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        int status;
        if (pnfs_ld_layoutret_on_setattr(inode))
-                pnfs_return_layout(inode);
+                pnfs_commit_and_return_layout(inode);
        nfs_fattr_init(fattr);
        
@@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 static void nfs4_layoutcommit_release(void *calldata)
 {
        struct nfs4_layoutcommit_data *data = calldata;
-        struct pnfs_layout_segment *lseg, *tmp;
-        unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
        pnfs_cleanup_layoutcommit(data);
-        /* Matched by references in pnfs_set_layoutcommit */
-        list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
-                list_del_init(&lseg->pls_lc_list);
-                if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
-                                       &lseg->pls_flags))
-                        pnfs_put_lseg(lseg);
-        }
-        clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
-        smp_mb__after_clear_bit();
-        wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
        put_rpccred(data->cred);
        kfree(data);
 }
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aad6258..4bdffe0ba025 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
               lo_seg_intersecting(lseg_range, recall_range);
 }
+static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
+                struct list_head *tmp_list)
+{
+        if (!atomic_dec_and_test(&lseg->pls_refcount))
+                return false;
+        pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
+        list_add(&lseg->pls_list, tmp_list);
+        return true;
+}
 /* Returns 1 if lseg is removed from list, 0 otherwise */
 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
                             struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
                 */
                dprintk("%s: lseg %p ref %d\n", __func__, lseg,
                        atomic_read(&lseg->pls_refcount));
-                if (atomic_dec_and_test(&lseg->pls_refcount)) {
+                if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
-                        pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
-                        list_add(&lseg->pls_list, tmp_list);
                        rv = 1;
-                }
        }
        return rv;
 }
@@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
        return lseg;
 }
+static void pnfs_clear_layoutcommit(struct inode *inode,
+                struct list_head *head)
+{
+        struct nfs_inode *nfsi = NFS_I(inode);
+        struct pnfs_layout_segment *lseg, *tmp;
+        if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+                return;
+        list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
+                if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+                        continue;
+                pnfs_lseg_dec_and_remove_zero(lseg, head);
+        }
+}
 /*
 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
 * when the layout segment list is empty.
@@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino)
        /* Reference matched in nfs4_layoutreturn_release */
        pnfs_get_layout_hdr(lo);
        empty = list_empty(&lo->plh_segs);
+        pnfs_clear_layoutcommit(ino, &tmp_list);
        pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
        /* Don't send a LAYOUTRETURN if list was initially empty */
        if (empty) {
@@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino)
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
-        WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
        lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
        if (unlikely(lrp == NULL)) {
                status = -ENOMEM;
@@ -845,6 +866,33 @@ out:
 }
 EXPORT_SYMBOL_GPL(_pnfs_return_layout);
+int
+pnfs_commit_and_return_layout(struct inode *inode)
+{
+        struct pnfs_layout_hdr *lo;
+        int ret;
+        spin_lock(&inode->i_lock);
+        lo = NFS_I(inode)->layout;
+        if (lo == NULL) {
+                spin_unlock(&inode->i_lock);
+                return 0;
+        }
+        pnfs_get_layout_hdr(lo);
+        /* Block new layoutgets and read/write to ds */
+        lo->plh_block_lgets++;
+        spin_unlock(&inode->i_lock);
+        filemap_fdatawait(inode->i_mapping);
+        ret = pnfs_layoutcommit_inode(inode, true);
+        if (ret == 0)
+                ret = _pnfs_return_layout(inode);
+        spin_lock(&inode->i_lock);
+        lo->plh_block_lgets--;
+        spin_unlock(&inode->i_lock);
+        pnfs_put_layout_hdr(lo);
+        return ret;
+}
 bool pnfs_roc(struct inode *ino)
 {
        struct pnfs_layout_hdr *lo;
@@ -1458,7 +1506,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
        dprintk("pnfs write error = %d\n", hdr->pnfs_error);
        if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
            PNFS_LAYOUTRET_ON_ERROR) {
-                clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
                pnfs_return_layout(hdr->inode);
        }
        if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1660,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
        dprintk("pnfs read error = %d\n", hdr->pnfs_error);
        if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
            PNFS_LAYOUTRET_ON_ERROR) {
-                clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
                pnfs_return_layout(hdr->inode);
        }
        if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1746,11 +1792,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
        list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
                if (lseg->pls_range.iomode == IOMODE_RW &&
-                    test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+                    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
                        list_add(&lseg->pls_lc_list, listp);
        }
 }
+static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
+{
+        struct pnfs_layout_segment *lseg, *tmp;
+        unsigned long *bitlock = &NFS_I(inode)->flags;
+        /* Matched by references in pnfs_set_layoutcommit */
+        list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
+                list_del_init(&lseg->pls_lc_list);
+                pnfs_put_lseg(lseg);
+        }
+        clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+        smp_mb__after_clear_bit();
+        wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 {
        pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
        if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
                nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+        pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
 }
 /*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba80417748..f5f8a470a647 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
+int pnfs_commit_and_return_layout(struct inode *);
 void pnfs_ld_write_done(struct nfs_write_data *);
 void pnfs_ld_read_done(struct nfs_read_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
        return 0;
 }
+static inline int pnfs_commit_and_return_layout(struct inode *inode)
+{
+        return 0;
+}
 static inline bool
 pnfs_ld_layoutret_on_setattr(struct inode *inode)
 {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 17b32b722457..2f8a29db0f1b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -294,6 +294,7 @@ struct file_system_type nfs_fs_type = {
        .kill_sb        = nfs_kill_super,
        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
+MODULE_ALIAS_FS("nfs");
 EXPORT_SYMBOL_GPL(nfs_fs_type);
 struct file_system_type nfs_xdev_fs_type = {
@@ -333,6 +334,8 @@ struct file_system_type nfs4_fs_type = {
        .kill_sb        = nfs_kill_super,
        .fs_flags       = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
 };
+MODULE_ALIAS_FS("nfs4");
+MODULE_ALIAS("nfs4");
 EXPORT_SYMBOL_GPL(nfs4_fs_type);
 static int __init register_nfs4_fs(void)
@@ -2717,6 +2720,5 @@ module_param(send_implementation_id, ushort, 0644);
 MODULE_PARM_DESC(send_implementation_id,
                "Send implementation ID with NFSv4.1 exchange_id");
 MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string");
-MODULE_ALIAS("nfs4");
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 16d39c6c4fbb..2e27430b9070 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,37 +230,6 @@ static void nfs4_file_put_access(struct nfs4_file *fp, int oflag)
                __nfs4_file_put_access(fp, oflag);
 }
-static inline int get_new_stid(struct nfs4_stid *stid)
-{
-        static int min_stateid = 0;
-        struct idr *stateids = &stid->sc_client->cl_stateids;
-        int new_stid;
-        int error;
-        error = idr_get_new_above(stateids, stid, min_stateid, &new_stid);
-        /*
-         * Note: the necessary preallocation was done in
-         * nfs4_alloc_stateid().  The idr code caps the number of
-         * preallocations that can exist at a time, but the state lock
-         * prevents anyone from using ours before we get here:
-         */
-        WARN_ON_ONCE(error);
-        /*
-         * It shouldn't be a problem to reuse an opaque stateid value.
-         * I don't think it is for 4.1.  But with 4.0 I worry that, for
-         * example, a stray write retransmission could be accepted by
-         * the server when it should have been rejected.  Therefore,
-         * adopt a trick from the sctp code to attempt to maximize the
-         * amount of time until an id is reused, by ensuring they always
-         * "increase" (mod INT_MAX):
-         */
-        min_stateid = new_stid+1;
-        if (min_stateid == INT_MAX)
-                min_stateid = 0;
-        return new_stid;
-}
 static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct
 kmem_cache *slab)
 {
@@ -273,9 +242,8 @@ kmem_cache *slab)
        if (!stid)
                return NULL;
-        if (!idr_pre_get(stateids, GFP_KERNEL))
+        new_id = idr_alloc(stateids, stid, min_stateid, 0, GFP_KERNEL);
-                goto out_free;
+        if (new_id < 0)
-        if (idr_get_new_above(stateids, stid, min_stateid, &new_id))
                goto out_free;
        stid->sc_client = cl;
        stid->sc_type = 0;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 62c1ee128aeb..ca05f6dc3544 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -102,7 +102,8 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 {
        if (rp->c_type == RC_REPLBUFF)
                kfree(rp->c_replvec.iov_base);
-        hlist_del(&rp->c_hash);
+        if (!hlist_unhashed(&rp->c_hash))
+                hlist_del(&rp->c_hash);
        list_del(&rp->c_lru);
        --num_drc_entries;
        kmem_cache_free(drc_slab, rp);
@@ -118,6 +119,10 @@ nfsd_reply_cache_free(struct svc_cacherep *rp)
 int nfsd_reply_cache_init(void)
 {
+        INIT_LIST_HEAD(&lru_head);
+        max_drc_entries = nfsd_cache_size_limit();
+        num_drc_entries = 0;
        register_shrinker(&nfsd_reply_cache_shrinker);
        drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
                                        0, 0, NULL);
@@ -128,10 +133,6 @@ int nfsd_reply_cache_init(void)
        if (!cache_hash)
                goto out_nomem;
-        INIT_LIST_HEAD(&lru_head);
-        max_drc_entries = nfsd_cache_size_limit();
-        num_drc_entries = 0;
        return 0;
 out_nomem:
        printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 13a21c8fca49..f33455b4d957 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1090,6 +1090,7 @@ static struct file_system_type nfsd_fs_type = {
        .mount          = nfsd_mount,
        .kill_sb        = nfsd_umount,
 };
+MODULE_ALIAS_FS("nfsd");
 #ifdef CONFIG_PROC_FS
 static int create_proc_exports_entry(void)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2a7eb536de0b..2b2e2396a869 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1013,6 +1013,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        int                     host_err;
        int                     stable = *stablep;
        int                     use_wgather;
+        loff_t                  pos = offset;
        dentry = file->f_path.dentry;
        inode = dentry->d_inode;
@@ -1025,7 +1026,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        /* Write the data. */
        oldfs = get_fs(); set_fs(KERNEL_DS);
-        host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
+        host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos);
        set_fs(oldfs);
        if (host_err < 0)
                goto out_nfserr;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 3c991dc84f2f..c7d1f9f18b09 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1361,6 +1361,7 @@ struct file_system_type nilfs_fs_type = {
        .kill_sb  = kill_block_super,
        .fs_flags = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("nilfs2");
 static void nilfs_inode_init_once(void *obj)
 {
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 4a8289f8b16c..82650d52d916 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3079,6 +3079,7 @@ static struct file_system_type ntfs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("ntfs");
 /* Stable names for the slab caches. */
 static const char ntfs_index_ctx_cache_name[] = "ntfs_index_ctx_cache";
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 4c5fc8d77dc2..12bafb7265ce 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -640,6 +640,7 @@ static struct file_system_type dlmfs_fs_type = {
        .mount          = dlmfs_mount,
        .kill_sb        = kill_litter_super,
 };
+MODULE_ALIAS_FS("ocfs2_dlmfs");
 static int __init init_dlmfs_fs(void)
 {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 9b6910dec4ba..01b85165552b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1266,6 +1266,7 @@ static struct file_system_type ocfs2_fs_type = {
        .fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
        .next           = NULL
 };
+MODULE_ALIAS_FS("ocfs2");
 static int ocfs2_check_set_options(struct super_block *sb,
                                   struct mount_options *options)
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 25d715c7c87a..d8b0afde2179 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -572,6 +572,7 @@ static struct file_system_type omfs_fs_type = {
        .kill_sb = kill_block_super,
        .fs_flags = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("omfs");
 static int __init init_omfs_fs(void)
 {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ae47fa7efb9d..75885ffde44e 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -432,6 +432,7 @@ static struct file_system_type openprom_fs_type = {
        .mount          = openprom_mount,
        .kill_sb        = kill_anon_super,
 };
+MODULE_ALIAS_FS("openpromfs");
 static void op_inode_init_once(void *data)
 {
diff --git a/fs/pipe.c b/fs/pipe.c
index 64a494cef0a0..2234f3f61f8d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -863,6 +863,9 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
 {
        int ret = -ENOENT;
+        if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
+                return -EINVAL;
        mutex_lock(&inode->i_mutex);
        if (inode->i_pipe) {
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a51ac0d..8b29d2164da6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -9,6 +9,7 @@
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/fs.h>
+#include <linux/nsproxy.h>
 #include "internal.h"
 #include "pnode.h"
@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest,
 int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
                    struct mount *source_mnt, struct list_head *tree_list)
 {
+        struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
        struct mount *m, *child;
        int ret = 0;
        struct mount *prev_dest_mnt = dest_mnt;
@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
                source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
+                /* Notice when we are propagating across user namespaces */
+                if (m->mnt_ns->user_ns != user_ns)
+                        type |= CL_UNPRIVILEGED;
                child = copy_tree(source, source->mnt.mnt_root, type);
                if (IS_ERR(child)) {
                        ret = PTR_ERR(child);
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3445c..a0493d5ebfbf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -23,6 +23,7 @@
 #define CL_MAKE_SHARED          0x08
 #define CL_PRIVATE              0x10
 #define CL_SHARED_TO_SLAVE      0x20
+#define CL_UNPRIVILEGED         0x40
 static inline void set_mnt_shared(struct mount *mnt)
 {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a86aebc9ba7c..869116c2afbe 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -446,9 +446,10 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
 struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 {
-        struct inode *inode = iget_locked(sb, de->low_ino);
+        struct inode *inode = new_inode_pseudo(sb);
-        if (inode && (inode->i_state & I_NEW)) {
+        if (inode) {
+                inode->i_ino = de->low_ino;
                inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
                PROC_I(inode)->pde = de;
@@ -476,7 +477,6 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
                                inode->i_fop = de->proc_fops;
                        }
                }
-                unlock_new_inode(inode);
        } else
               pde_put(de);
        return inode;
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index b7a47196c8c3..66b51c0383da 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -118,7 +118,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
        struct super_block *sb = inode->i_sb;
        struct proc_inode *ei = PROC_I(inode);
        struct task_struct *task;
-        struct dentry *ns_dentry;
+        struct path ns_path;
        void *error = ERR_PTR(-EACCES);
        task = get_proc_task(inode);
@@ -128,14 +128,14 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
        if (!ptrace_may_access(task, PTRACE_MODE_READ))
                goto out_put_task;
-        ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
+        ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
-        if (IS_ERR(ns_dentry)) {
+        if (IS_ERR(ns_path.dentry)) {
-                error = ERR_CAST(ns_dentry);
+                error = ERR_CAST(ns_path.dentry);
                goto out_put_task;
        }
-        dput(nd->path.dentry);
+        ns_path.mnt = mntget(nd->path.mnt);
-        nd->path.dentry = ns_dentry;
+        nd_jump_link(nd, &ns_path);
        error = NULL;
 out_put_task:
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac26bac..9c7fab1d23f0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/user_namespace.h>
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
 #include <linux/parser.h>
@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
        } else {
                ns = task_active_pid_ns(current);
                options = data;
+                if (!current_user_ns()->may_mount_proc)
+                        return ERR_PTR(-EPERM);
        }
        sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 43098bb5723a..2e8caa62da78 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -412,6 +412,7 @@ static struct file_system_type qnx4_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("qnx4");
 static int __init init_qnx4_fs(void)
 {
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 57199a52a351..8d941edfefa1 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -672,6 +672,7 @@ static struct file_system_type qnx6_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("qnx6");
 static int __init init_qnx6_fs(void)
 {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 05ae3c97f7a5..3e64169ef527 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1439,8 +1439,11 @@ static void __dquot_initialize(struct inode *inode, int type)
                         * did a write before quota was turned on
                         */
                        rsv = inode_get_rsv_space(inode);
-                        if (unlikely(rsv))
+                        if (unlikely(rsv)) {
+                                spin_lock(&dq_data_lock);
                                dquot_resv_space(inode->i_dquot[cnt], rsv);
+                                spin_unlock(&dq_data_lock);
+                        }
                }
        }
 out_err:
diff --git a/fs/read_write.c b/fs/read_write.c
index a698eff457fb..e6ddc8dceb96 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
 #include <linux/splice.h>
 #include <linux/compat.h>
 #include "read_write.h"
+#include "internal.h"
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -417,6 +418,33 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 EXPORT_SYMBOL(do_sync_write);
+ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
+{
+        mm_segment_t old_fs;
+        const char __user *p;
+        ssize_t ret;
+        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+                return -EINVAL;
+        old_fs = get_fs();
+        set_fs(get_ds());
+        p = (__force const char __user *)buf;
+        if (count > MAX_RW_COUNT)
+                count =  MAX_RW_COUNT;
+        if (file->f_op->write)
+                ret = file->f_op->write(file, p, count, pos);
+        else
+                ret = do_sync_write(file, p, count, pos);
+        set_fs(old_fs);
+        if (ret > 0) {
+                fsnotify_modify(file);
+                add_wchar(current, ret);
+        }
+        inc_syscw(current);
+        return ret;
+}
 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 {
        ssize_t ret;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 418bdc3a57da..f8a23c3078f8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1147,8 +1147,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                                                         "on filesystem root.");
                                        return 0;
                                }
-                                qf_names[qtype] =
+                                qf_names[qtype] = kstrdup(arg, GFP_KERNEL);
-                                    kmalloc(strlen(arg) + 1, GFP_KERNEL);
                                if (!qf_names[qtype]) {
                                        reiserfs_warning(s, "reiserfs-2502",
                                                         "not enough memory "
@@ -1156,7 +1155,6 @@ static int reiserfs_parse_options(struct super_block *s, char *options,	/* strin
                                                         "quotafile name.");
                                        return 0;
                                }
-                                strcpy(qf_names[qtype], arg);
                                if (qtype == USRQUOTA)
                                        *mount_options |= 1 << REISERFS_USRQUOTA;
                                else
@@ -2434,6 +2432,7 @@ struct file_system_type reiserfs_fs_type = {
        .kill_sb = reiserfs_kill_sb,
        .fs_flags = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("reiserfs");
 MODULE_DESCRIPTION("ReiserFS journaled filesystem");
 MODULE_AUTHOR("Hans Reiser <reiser@namesys.com>");
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 7e8d3a80bdab..15cbc41ee365 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -599,6 +599,7 @@ static struct file_system_type romfs_fs_type = {
        .kill_sb        = romfs_kill_sb,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("romfs");
 /*
 * inode storage initialiser
diff --git a/fs/splice.c b/fs/splice.c
index 718bd0056384..29e394e49ddd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/gfp.h>
 #include <linux/socket.h>
+#include "internal.h"
 /*
 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -1048,9 +1049,10 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 {
        int ret;
        void *data;
+        loff_t tmp = sd->pos;
        data = buf->ops->map(pipe, buf, 0);
-        ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+        ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
        buf->ops->unmap(pipe, buf, data);
        return ret;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 260e3928d4f5..60553a9053ca 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -489,6 +489,7 @@ static struct file_system_type squashfs_fs_type = {
        .kill_sb = kill_block_super,
        .fs_flags = FS_REQUIRES_DEV
 };
+MODULE_ALIAS_FS("squashfs");
 static const struct super_operations squashfs_super_ops = {
        .alloc_inode = squashfs_alloc_inode,
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2fbdff6be25c..e14512678c9b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -1020,6 +1020,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
                ino = parent_sd->s_ino;
                if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
                        filp->f_pos++;
+                else
+                        return 0;
        }
        if (filp->f_pos == 1) {
                if (parent_sd->s_parent)
@@ -1028,6 +1030,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
                        ino = parent_sd->s_ino;
                if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
                        filp->f_pos++;
+                else
+                        return 0;
        }
        mutex_lock(&sysfs_mutex);
        for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
@@ -1058,10 +1062,21 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
        return 0;
 }
+static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+        struct inode *inode = file_inode(file);
+        loff_t ret;
+        mutex_lock(&inode->i_mutex);
+        ret = generic_file_llseek(file, offset, whence);
+        mutex_unlock(&inode->i_mutex);
+        return ret;
+}
 const struct file_operations sysfs_dir_operations = {
        .read           = generic_read_dir,
        .readdir        = sysfs_readdir,
        .release        = sysfs_dir_release,
-        .llseek         = generic_file_llseek,
+        .llseek         = sysfs_dir_llseek,
 };
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8d924b5ec733..afd83273e6ce 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/user_namespace.h>
 #include "sysfs.h"
@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
        struct super_block *sb;
        int error;
+        if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+                return ERR_PTR(-EPERM);
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info)
                return ERR_PTR(-ENOMEM);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a38e87bdd78d..d0c6a007ce83 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -545,6 +545,7 @@ static struct file_system_type sysv_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("sysv");
 static struct file_system_type v7_fs_type = {
        .owner          = THIS_MODULE,
@@ -553,6 +554,8 @@ static struct file_system_type v7_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("v7");
+MODULE_ALIAS("v7");
 static int __init init_sysv_fs(void)
 {
@@ -586,5 +589,4 @@ static void __exit exit_sysv_fs(void)
 module_init(init_sysv_fs)
 module_exit(exit_sysv_fs)
-MODULE_ALIAS("v7");
 MODULE_LICENSE("GPL");
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ddc0f6ae65e9..ac838b844936 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2174,6 +2174,7 @@ static struct file_system_type ubifs_fs_type = {
        .mount   = ubifs_mount,
        .kill_sb = kill_ubifs_super,
 };
+MODULE_ALIAS_FS("ubifs");
 /*
 * Inode slab cache constructor.
diff --git a/fs/udf/super.c b/fs/udf/super.c
index bc5b30a819e8..9ac4057a86c9 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -118,6 +118,7 @@ static struct file_system_type udf_fstype = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("udf");
 static struct kmem_cache *udf_inode_cachep;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index dc8e3a861d0f..329f2f53b7ed 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1500,6 +1500,7 @@ static struct file_system_type ufs_fs_type = {
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("ufs");
 static int __init init_ufs_fs(void)
 {
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4e8f0df82d02..8459b5d8cb71 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1334,6 +1334,12 @@ _xfs_buf_ioapply(
        int             size;
        int             i;
+        /*
+         * Make sure we capture only current IO errors rather than stale errors
+         * left over from previous use of the buffer (e.g. failed readahead).
+         */
+        bp->b_error = 0;
        if (bp->b_flags & XBF_WRITE) {
                if (bp->b_flags & XBF_SYNCIO)
                        rw = WRITE_SYNC;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 912d83d8860a..5a30dd899d2b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -325,7 +325,7 @@ xfs_iomap_eof_want_preallocate(
 * rather than falling short due to things like stripe unit/width alignment of
 * real extents.
 */
-STATIC int
+STATIC xfs_fsblock_t
 xfs_iomap_eof_prealloc_initial_size(
        struct xfs_mount        *mp,
        struct xfs_inode        *ip,
@@ -413,7 +413,7 @@ xfs_iomap_prealloc_size(
                 * have a large file on a small filesystem and the above
                 * lowspace thresholds are smaller than MAXEXTLEN.
                 */
-                while (alloc_blocks >= freesp)
+                while (alloc_blocks && alloc_blocks >= freesp)
                        alloc_blocks >>= 4;
        }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index c407121873b4..ea341cea68cb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1561,6 +1561,7 @@ static struct file_system_type xfs_fs_type = {
        .kill_sb                = kill_block_super,
        .fs_flags               = FS_REQUIRES_DEV,
 };
+MODULE_ALIAS_FS("xfs");
 STATIC int __init
 xfs_init_zones(void)