90 files changed, 1482 insertions, 921 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3f75895c919b..a383c18e74e8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -179,60 +179,74 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
 static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
                                struct ulist *parents, int level,
-                                struct btrfs_key *key, u64 wanted_disk_byte,
+                                struct btrfs_key *key_for_search, u64 time_seq,
+                                u64 wanted_disk_byte,
                                const u64 *extent_item_pos)
 {
-        int ret;
+        int ret = 0;
-        int slot = path->slots[level];
+        int slot;
-        struct extent_buffer *eb = path->nodes[level];
+        struct extent_buffer *eb;
+        struct btrfs_key key;
        struct btrfs_file_extent_item *fi;
        struct extent_inode_elem *eie = NULL;
        u64 disk_byte;
-        u64 wanted_objectid = key->objectid;
-add_parent:
+        if (level != 0) {
-        if (level == 0 && extent_item_pos) {
+                eb = path->nodes[level];
-                fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
+                ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
-                ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie);
                if (ret < 0)
                        return ret;
-        }
-        ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS);
-        if (ret < 0)
-                return ret;
-        if (level != 0)
                return 0;
+        }
        /*
-         * if the current leaf is full with EXTENT_DATA items, we must
+         * We normally enter this function with the path already pointing to
-         * check the next one if that holds a reference as well.
+         * the first item to check. But sometimes, we may enter it with
-         * ref->count cannot be used to skip this check.
+         * slot==nritems. In that case, go to the next leaf before we continue.
-         * repeat this until we don't find any additional EXTENT_DATA items.
         */
-        while (1) {
+        if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
-                eie = NULL;
+                ret = btrfs_next_old_leaf(root, path, time_seq);
-                ret = btrfs_next_leaf(root, path);
-                if (ret < 0)
-                        return ret;
-                if (ret)
-                        return 0;
+        while (!ret) {
                eb = path->nodes[0];
-                for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) {
+                slot = path->slots[0];
-                        btrfs_item_key_to_cpu(eb, key, slot);
-                        if (key->objectid != wanted_objectid ||
+                btrfs_item_key_to_cpu(eb, &key, slot);
-                            key->type != BTRFS_EXTENT_DATA_KEY)
-                                return 0;
+                if (key.objectid != key_for_search->objectid ||
-                        fi = btrfs_item_ptr(eb, slot,
+                    key.type != BTRFS_EXTENT_DATA_KEY)
-                                                struct btrfs_file_extent_item);
+                        break;
-                        disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
-                        if (disk_byte == wanted_disk_byte)
+                fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
-                                goto add_parent;
+                disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
+                if (disk_byte == wanted_disk_byte) {
+                        eie = NULL;
+                        if (extent_item_pos) {
+                                ret = check_extent_in_eb(&key, eb, fi,
+                                                *extent_item_pos,
+                                                &eie);
+                                if (ret < 0)
+                                        break;
+                        }
+                        if (!ret) {
+                                ret = ulist_add(parents, eb->start,
+                                                (unsigned long)eie, GFP_NOFS);
+                                if (ret < 0)
+                                        break;
+                                if (!extent_item_pos) {
+                                        ret = btrfs_next_old_leaf(root, path,
+                                                        time_seq);
+                                        continue;
+                                }
+                        }
                }
+                ret = btrfs_next_old_item(root, path, time_seq);
        }
-        return 0;
+        if (ret > 0)
+                ret = 0;
+        return ret;
 }
 /*
@@ -249,7 +263,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_path *path;
        struct btrfs_root *root;
        struct btrfs_key root_key;
-        struct btrfs_key key = {0};
        struct extent_buffer *eb;
        int ret = 0;
        int root_level;
@@ -288,25 +301,19 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
                goto out;
        eb = path->nodes[level];
-        if (!eb) {
+        while (!eb) {
-                WARN_ON(1);
+                if (!level) {
-                ret = 1;
+                        WARN_ON(1);
-                goto out;
+                        ret = 1;
-        }
+                        goto out;
-        if (level == 0) {
-                if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) {
-                        ret = btrfs_next_leaf(root, path);
-                        if (ret)
-                                goto out;
-                        eb = path->nodes[0];
                }
+                level--;
-                btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
+                eb = path->nodes[level];
        }
-        ret = add_all_parents(root, path, parents, level, &key,
+        ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
-                                ref->wanted_disk_byte, extent_item_pos);
+                                time_seq, ref->wanted_disk_byte,
+                                extent_item_pos);
 out:
        btrfs_free_path(path);
        return ret;
@@ -832,6 +839,7 @@ again:
                        }
                        ret = __add_delayed_refs(head, delayed_ref_seq,
                                                 &prefs_delayed);
+                        mutex_unlock(&head->mutex);
                        if (ret) {
                                spin_unlock(&delayed_refs->lock);
                                goto out;
@@ -925,8 +933,6 @@ again:
        }
 out:
-        if (head)
-                mutex_unlock(&head->mutex);
        btrfs_free_path(path);
        while (!list_empty(&prefs)) {
                ref = list_first_entry(&prefs, struct __prelim_ref, list);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index e616f8872e69..12394a90d60f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -37,6 +37,7 @@
 #define BTRFS_INODE_IN_DEFRAG                   3
 #define BTRFS_INODE_DELALLOC_META_RESERVED      4
 #define BTRFS_INODE_HAS_ORPHAN_ITEM             5
+#define BTRFS_INODE_HAS_ASYNC_EXTENT            6
 /* in memory btrfs inode */
 struct btrfs_inode {
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 9cebb1fd6a3c..da6e9364a5e3 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -93,6 +93,7 @@
 #include "print-tree.h"
 #include "locking.h"
 #include "check-integrity.h"
+#include "rcu-string.h"
 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
@@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror(
                superblock_tmp->never_written = 0;
                superblock_tmp->mirror_num = 1 + superblock_mirror_num;
                if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
-                        printk(KERN_INFO "New initial S-block (bdev %p, %s)"
+                        printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
-                               " @%llu (%s/%llu/%d)\n",
+                                     " @%llu (%s/%llu/%d)\n",
-                               superblock_bdev, device->name,
+                                     superblock_bdev,
-                               (unsigned long long)dev_bytenr,
+                                     rcu_str_deref(device->name),
-                               dev_state->name,
+                                     (unsigned long long)dev_bytenr,
-                               (unsigned long long)dev_bytenr,
+                                     dev_state->name,
-                               superblock_mirror_num);
+                                     (unsigned long long)dev_bytenr,
+                                     superblock_mirror_num);
                list_add(&superblock_tmp->all_blocks_node,
                         &state->all_blocks_list);
                btrfsic_block_hashtable_add(superblock_tmp,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d7a96cfdc50a..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
        return 0;
 }
+/*
+ * This allocates memory and gets a tree modification sequence number when
+ * needed.
+ *
+ * Returns 0 when no sequence number is needed, < 0 on error.
+ * Returns 1 when a sequence number was added. In this case,
+ * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
+ * after inserting into the rb tree.
+ */
 static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
                                 struct tree_mod_elem **tm_ret)
 {
@@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
                 */
                kfree(tm);
                seq = 0;
+                spin_unlock(&fs_info->tree_mod_seq_lock);
        } else {
                __get_tree_mod_seq(fs_info, &tm->elem);
                seq = tm->elem.seq;
        }
-        spin_unlock(&fs_info->tree_mod_seq_lock);
        return seq;
 }
@@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
        tm->slot = slot;
        tm->generation = btrfs_node_ptr_generation(eb, slot);
-        return __tree_mod_log_insert(fs_info, tm);
+        ret = __tree_mod_log_insert(fs_info, tm);
+        spin_unlock(&fs_info->tree_mod_seq_lock);
+        return ret;
 }
 static noinline int
@@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
        tm->move.nr_items = nr_items;
        tm->op = MOD_LOG_MOVE_KEYS;
-        return __tree_mod_log_insert(fs_info, tm);
+        ret = __tree_mod_log_insert(fs_info, tm);
+        spin_unlock(&fs_info->tree_mod_seq_lock);
+        return ret;
 }
 static noinline int
@@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
        tm->generation = btrfs_header_generation(old_root);
        tm->op = MOD_LOG_ROOT_REPLACE;
-        return __tree_mod_log_insert(fs_info, tm);
+        ret = __tree_mod_log_insert(fs_info, tm);
+        spin_unlock(&fs_info->tree_mod_seq_lock);
+        return ret;
 }
 static struct tree_mod_elem *
@@ -1009,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
                if (!looped && !tm)
                        return 0;
                /*
-                 * we must have key remove operations in the log before the
+                 * if there are no tree operation for the oldest root, we simply
-                 * replace operation.
+                 * return it. this should only happen if that (old) root is at
+                 * level 0.
                 */
-                BUG_ON(!tm);
+                if (!tm)
+                        break;
+                /*
+                 * if there's an operation that's not a root replacement, we
+                 * found the oldest version of our root. normally, we'll find a
+                 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
+                 */
                if (tm->op != MOD_LOG_ROOT_REPLACE)
                        break;
@@ -1023,6 +1045,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
                looped = 1;
        }
+        /* if there's no old root to return, return what we found instead */
+        if (!found)
+                found = tm;
        return found;
 }
@@ -1068,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
                                                      tm->generation);
                        break;
                case MOD_LOG_KEY_ADD:
-                        if (tm->slot != n - 1) {
+                        /* if a move operation is needed it's in the log */
-                                o_dst = btrfs_node_key_ptr_offset(tm->slot);
-                                o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
-                                memmove_extent_buffer(eb, o_dst, o_src, p_size);
-                        }
                        n--;
                        break;
                case MOD_LOG_MOVE_KEYS:
@@ -1143,45 +1165,57 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
        return eb_rewin;
 }
+/*
+ * get_old_root() rewinds the state of @root's root node to the given @time_seq
+ * value. If there are no changes, the current root->root_node is returned. If
+ * anything changed in between, there's a fresh buffer allocated on which the
+ * rewind operations are done. In any case, the returned buffer is read locked.
+ * Returns NULL on error (with no locks held).
+ */
 static inline struct extent_buffer *
 get_old_root(struct btrfs_root *root, u64 time_seq)
 {
        struct tree_mod_elem *tm;
        struct extent_buffer *eb;
-        struct tree_mod_root *old_root;
+        struct tree_mod_root *old_root = NULL;
-        u64 old_generation;
+        u64 old_generation = 0;
+        u64 logical;
+        eb = btrfs_read_lock_root_node(root);
        tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
        if (!tm)
                return root->node;
-        old_root = &tm->old_root;
+        if (tm->op == MOD_LOG_ROOT_REPLACE) {
-        old_generation = tm->generation;
+                old_root = &tm->old_root;
+                old_generation = tm->generation;
-        tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq);
+                logical = old_root->logical;
-        /*
+        } else {
-         * there was an item in the log when __tree_mod_log_oldest_root
+                logical = root->node->start;
-         * returned. this one must not go away, because the time_seq passed to
+        }
-         * us must be blocking its removal.
-         */
-        BUG_ON(!tm);
-        if (old_root->logical == root->node->start) {
+        tm = tree_mod_log_search(root->fs_info, logical, time_seq);
-                /* there are logged operations for the current root */
+        if (old_root)
+                eb = alloc_dummy_extent_buffer(logical, root->nodesize);
+        else
                eb = btrfs_clone_extent_buffer(root->node);
-        } else {
+        btrfs_tree_read_unlock(root->node);
-                /* there's a root replace operation for the current root */
+        free_extent_buffer(root->node);
-                eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
+        if (!eb)
-                                               root->nodesize);
+                return NULL;
+        btrfs_tree_read_lock(eb);
+        if (old_root) {
                btrfs_set_header_bytenr(eb, eb->start);
                btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
                btrfs_set_header_owner(eb, root->root_key.objectid);
+                btrfs_set_header_level(eb, old_root->level);
+                btrfs_set_header_generation(eb, old_generation);
        }
-        if (!eb)
+        if (tm)
-                return NULL;
+                __tree_mod_log_rewind(eb, time_seq, tm);
-        btrfs_set_header_level(eb, old_root->level);
+        else
-        btrfs_set_header_generation(eb, old_generation);
+                WARN_ON(btrfs_header_level(eb) != 0);
-        __tree_mod_log_rewind(eb, time_seq, tm);
+        extent_buffer_get(eb);
        return eb;
 }
@@ -1650,8 +1684,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
                return 0;
-        btrfs_header_nritems(mid);
        left = read_node_slot(root, parent, pslot - 1);
        if (left) {
                btrfs_tree_lock(left);
@@ -1681,7 +1713,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                wret = push_node_left(trans, root, left, mid, 1);
                if (wret < 0)
                        ret = wret;
-                btrfs_header_nritems(mid);
        }
        /*
@@ -2615,9 +2646,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
 again:
        b = get_old_root(root, time_seq);
-        extent_buffer_get(b);
        level = btrfs_header_level(b);
-        btrfs_tree_read_lock(b);
        p->locks[level] = BTRFS_READ_LOCK;
        while (b) {
@@ -2964,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 static void insert_ptr(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct btrfs_path *path,
                       struct btrfs_disk_key *key, u64 bytenr,
-                       int slot, int level, int tree_mod_log)
+                       int slot, int level)
 {
        struct extent_buffer *lower;
        int nritems;
@@ -2977,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
        BUG_ON(slot > nritems);
        BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
        if (slot != nritems) {
-                if (tree_mod_log && level)
+                if (level)
                        tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
                                             slot, nritems - slot);
                memmove_extent_buffer(lower,
@@ -2985,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
                              btrfs_node_key_ptr_offset(slot),
                              (nritems - slot) * sizeof(struct btrfs_key_ptr));
        }
-        if (tree_mod_log && level) {
+        if (level) {
                ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
                                              MOD_LOG_KEY_ADD);
                BUG_ON(ret < 0);
@@ -3073,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(split);
        insert_ptr(trans, root, path, &disk_key, split->start,
-                   path->slots[level + 1] + 1, level + 1, 1);
+                   path->slots[level + 1] + 1, level + 1);
        if (path->slots[level] >= mid) {
                path->slots[level] -= mid;
@@ -3610,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
        btrfs_set_header_nritems(l, mid);
        btrfs_item_key(right, &disk_key, 0);
        insert_ptr(trans, root, path, &disk_key, right->start,
-                   path->slots[1] + 1, 1, 0);
+                   path->slots[1] + 1, 1);
        btrfs_mark_buffer_dirty(right);
        btrfs_mark_buffer_dirty(l);
@@ -3817,7 +3846,7 @@ again:
                if (mid <= slot) {
                        btrfs_set_header_nritems(right, 0);
                        insert_ptr(trans, root, path, &disk_key, right->start,
-                                   path->slots[1] + 1, 1, 0);
+                                   path->slots[1] + 1, 1);
                        btrfs_tree_unlock(path->nodes[0]);
                        free_extent_buffer(path->nodes[0]);
                        path->nodes[0] = right;
@@ -3826,7 +3855,7 @@ again:
                } else {
                        btrfs_set_header_nritems(right, 0);
                        insert_ptr(trans, root, path, &disk_key, right->start,
-                                          path->slots[1], 1, 0);
+                                          path->slots[1], 1);
                        btrfs_tree_unlock(path->nodes[0]);
                        free_extent_buffer(path->nodes[0]);
                        path->nodes[0] = right;
@@ -5001,6 +5030,12 @@ next:
 */
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
 {
+        return btrfs_next_old_leaf(root, path, 0);
+}
+int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
+                        u64 time_seq)
+{
        int slot;
        int level;
        struct extent_buffer *c;
@@ -5025,7 +5060,10 @@ again:
        path->keep_locks = 1;
        path->leave_spinning = 1;
-        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+        if (time_seq)
+                ret = btrfs_search_old_slot(root, &key, path, time_seq);
+        else
+                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        path->keep_locks = 0;
        if (ret < 0)
@@ -5081,6 +5119,18 @@ again:
                if (!path->skip_locking) {
                        ret = btrfs_try_tree_read_lock(next);
+                        if (!ret && time_seq) {
+                                /*
+                                 * If we don't get the lock, we may be racing
+                                 * with push_leaf_left, holding that lock while
+                                 * itself waiting for the leaf we've currently
+                                 * locked. To solve this situation, we give up
+                                 * on our lock and cycle.
+                                 */
+                                btrfs_release_path(path);
+                                cond_resched();
+                                goto again;
+                        }
                        if (!ret) {
                                btrfs_set_path_blocking(path);
                                btrfs_tree_read_lock(next);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0236d03c6732..fa5c45b39075 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2753,13 +2753,20 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
 }
 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
-static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
+int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
+                        u64 time_seq);
+static inline int btrfs_next_old_item(struct btrfs_root *root,
+                                      struct btrfs_path *p, u64 time_seq)
 {
        ++p->slots[0];
        if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
-                return btrfs_next_leaf(root, p);
+                return btrfs_next_old_leaf(root, p, time_seq);
        return 0;
 }
+static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
+{
+        return btrfs_next_old_item(root, p, 0);
+}
 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
 int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index c18d0442ae6d..2399f4086915 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
                }
        }
 }
+void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
+{
+        struct btrfs_delayed_root *delayed_root;
+        struct btrfs_delayed_node *curr_node, *prev_node;
+        delayed_root = btrfs_get_delayed_root(root);
+        curr_node = btrfs_first_delayed_node(delayed_root);
+        while (curr_node) {
+                __btrfs_kill_delayed_node(curr_node);
+                prev_node = curr_node;
+                curr_node = btrfs_next_delayed_node(curr_node);
+                btrfs_release_delayed_node(prev_node);
+        }
+}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 7083d08b2a21..f5aa4023d3e1 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev);
 /* Used for drop dead root */
 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
+/* Used for clean the transaction */
+void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
 /* Used for readdir() */
 void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
                             struct list_head *del_list);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7ae51decf6d3..2936ca49b3b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,7 @@
 #include "free-space-cache.h"
 #include "inode-map.h"
 #include "check-integrity.h"
+#include "rcu-string.h"
 static struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
@@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb,
        features = btrfs_super_incompat_flags(disk_super);
        features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
-        if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
+        if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
                features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
        /*
@@ -2353,12 +2354,17 @@ retry_root_backup:
                                  BTRFS_CSUM_TREE_OBJECTID, csum_root);
        if (ret)
                goto recovery_tree_root;
        csum_root->track_dirty = 1;
        fs_info->generation = generation;
        fs_info->last_trans_committed = generation;
+        ret = btrfs_recover_balance(fs_info);
+        if (ret) {
+                printk(KERN_WARNING "btrfs: failed to recover balance\n");
+                goto fail_block_groups;
+        }
        ret = btrfs_init_dev_stats(fs_info);
        if (ret) {
                printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2484,20 +2490,23 @@ retry_root_backup:
                goto fail_trans_kthread;
        }
-        if (!(sb->s_flags & MS_RDONLY)) {
+        if (sb->s_flags & MS_RDONLY)
-                down_read(&fs_info->cleanup_work_sem);
+                return 0;
-                err = btrfs_orphan_cleanup(fs_info->fs_root);
-                if (!err)
-                        err = btrfs_orphan_cleanup(fs_info->tree_root);
-                up_read(&fs_info->cleanup_work_sem);
-                if (!err)
+        down_read(&fs_info->cleanup_work_sem);
-                        err = btrfs_recover_balance(fs_info->tree_root);
+        if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
+            (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
+                up_read(&fs_info->cleanup_work_sem);
+                close_ctree(tree_root);
+                return ret;
+        }
+        up_read(&fs_info->cleanup_work_sem);
-                if (err) {
+        ret = btrfs_resume_balance_async(fs_info);
-                        close_ctree(tree_root);
+        if (ret) {
-                        return err;
+                printk(KERN_WARNING "btrfs: failed to resume balance\n");
-                }
+                close_ctree(tree_root);
+                return ret;
        }
        return 0;
@@ -2575,8 +2584,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
                struct btrfs_device *device = (struct btrfs_device *)
                        bh->b_private;
-                printk_ratelimited(KERN_WARNING "lost page write due to "
+                printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
-                                   "I/O error on %s\n", device->name);
+                                          "I/O error on %s\n",
+                                          rcu_str_deref(device->name));
                /* note, we dont' set_buffer_write_io_error because we have
                 * our own ways of dealing with the IO errors
                 */
@@ -2749,8 +2759,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
                wait_for_completion(&device->flush_wait);
                if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
-                        printk("btrfs: disabling barriers on dev %s\n",
+                        printk_in_rcu("btrfs: disabling barriers on dev %s\n",
-                               device->name);
+                                      rcu_str_deref(device->name));
                        device->nobarriers = 1;
                }
                if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3400,7 +3410,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
        delayed_refs = &trans->delayed_refs;
-again:
        spin_lock(&delayed_refs->lock);
        if (delayed_refs->num_entries == 0) {
                spin_unlock(&delayed_refs->lock);
@@ -3408,31 +3417,37 @@ again:
                return ret;
        }
-        node = rb_first(&delayed_refs->root);
+        while ((node = rb_first(&delayed_refs->root)) != NULL) {
-        while (node) {
                ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-                node = rb_next(node);
-                ref->in_tree = 0;
-                rb_erase(&ref->rb_node, &delayed_refs->root);
-                delayed_refs->num_entries--;
                atomic_set(&ref->refs, 1);
                if (btrfs_delayed_ref_is_head(ref)) {
                        struct btrfs_delayed_ref_head *head;
                        head = btrfs_delayed_node_to_head(ref);
-                        spin_unlock(&delayed_refs->lock);
+                        if (!mutex_trylock(&head->mutex)) {
-                        mutex_lock(&head->mutex);
+                                atomic_inc(&ref->refs);
+                                spin_unlock(&delayed_refs->lock);
+                                /* Need to wait for the delayed ref to run */
+                                mutex_lock(&head->mutex);
+                                mutex_unlock(&head->mutex);
+                                btrfs_put_delayed_ref(ref);
+                                spin_lock(&delayed_refs->lock);
+                                continue;
+                        }
                        kfree(head->extent_op);
                        delayed_refs->num_heads--;
                        if (list_empty(&head->cluster))
                                delayed_refs->num_heads_ready--;
                        list_del_init(&head->cluster);
-                        mutex_unlock(&head->mutex);
-                        btrfs_put_delayed_ref(ref);
-                        goto again;
                }
+                ref->in_tree = 0;
+                rb_erase(&ref->rb_node, &delayed_refs->root);
+                delayed_refs->num_entries--;
                spin_unlock(&delayed_refs->lock);
                btrfs_put_delayed_ref(ref);
@@ -3520,11 +3535,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                             &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
                                               offset >> PAGE_CACHE_SHIFT);
                        spin_unlock(&dirty_pages->buffer_lock);
-                        if (eb) {
+                        if (eb)
                                ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
                                                         &eb->bflags);
-                                atomic_set(&eb->refs, 1);
-                        }
                        if (PageWriteback(page))
                                end_page_writeback(page);
@@ -3538,8 +3551,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
                                spin_unlock_irq(&page->mapping->tree_lock);
                        }
-                        page->mapping->a_ops->invalidatepage(page, 0);
                        unlock_page(page);
+                        page_cache_release(page);
                }
        }
@@ -3553,8 +3566,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
        u64 start;
        u64 end;
        int ret;
+        bool loop = true;
        unpin = pinned_extents;
+again:
        while (1) {
                ret = find_first_extent_bit(unpin, 0, &start, &end,
                                            EXTENT_DIRTY);
@@ -3572,6 +3587,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
                cond_resched();
        }
+        if (loop) {
+                if (unpin == &root->fs_info->freed_extents[0])
+                        unpin = &root->fs_info->freed_extents[1];
+                else
+                        unpin = &root->fs_info->freed_extents[0];
+                loop = false;
+                goto again;
+        }
        return 0;
 }
@@ -3585,21 +3609,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
        /* FIXME: cleanup wait for commit */
        cur_trans->in_commit = 1;
        cur_trans->blocked = 1;
-        if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
+        wake_up(&root->fs_info->transaction_blocked_wait);
-                wake_up(&root->fs_info->transaction_blocked_wait);
        cur_trans->blocked = 0;
-        if (waitqueue_active(&root->fs_info->transaction_wait))
+        wake_up(&root->fs_info->transaction_wait);
-                wake_up(&root->fs_info->transaction_wait);
        cur_trans->commit_done = 1;
-        if (waitqueue_active(&cur_trans->commit_wait))
+        wake_up(&cur_trans->commit_wait);
-                wake_up(&cur_trans->commit_wait);
+        btrfs_destroy_delayed_inodes(root);
+        btrfs_assert_delayed_root_empty(root);
        btrfs_destroy_pending_snapshots(cur_trans);
        btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
                                     EXTENT_DIRTY);
+        btrfs_destroy_pinned_extent(root,
+                                    root->fs_info->pinned_extents);
        /*
        memset(cur_trans, 0, sizeof(*cur_trans));
@@ -3648,6 +3674,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
                if (waitqueue_active(&t->commit_wait))
                        wake_up(&t->commit_wait);
+                btrfs_destroy_delayed_inodes(root);
+                btrfs_assert_delayed_root_empty(root);
                btrfs_destroy_pending_snapshots(t);
                btrfs_destroy_delalloc_inodes(root);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b5a1e1bdefb..6e1d36702ff7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2347,12 +2347,10 @@ next:
        return count;
 }
 static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
-                        unsigned long num_refs)
+                               unsigned long num_refs,
+                               struct list_head *first_seq)
 {
-        struct list_head *first_seq = delayed_refs->seq_head.next;
        spin_unlock(&delayed_refs->lock);
        pr_debug("waiting for more refs (num %ld, first %p)\n",
                 num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_delayed_ref_node *ref;
        struct list_head cluster;
+        struct list_head *first_seq = NULL;
        int ret;
        u64 delayed_start;
        int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
                                 */
                                consider_waiting = 1;
                                num_refs = delayed_refs->num_entries;
+                                first_seq = root->fs_info->tree_mod_seq_list.next;
                        } else {
-                                wait_for_more_refs(delayed_refs, num_refs);
+                                wait_for_more_refs(delayed_refs,
+                                                   num_refs, first_seq);
                                /*
                                 * after waiting, things have changed. we
                                 * dropped the lock and someone else might have
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2c8f7b204617..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -20,6 +20,7 @@
 #include "volumes.h"
 #include "check-integrity.h"
 #include "locking.h"
+#include "rcu-string.h"
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
@@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
                return -EIO;
        }
-        printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s "
+        printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
-                        "sector %llu)\n", page->mapping->host->i_ino, start,
+                      "(dev %s sector %llu)\n", page->mapping->host->i_ino,
-                        dev->name, sector);
+                      start, rcu_str_deref(dev->name), sector);
        bio_put(bio);
        return 0;
@@ -3323,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                             writepage_t writepage, void *data,
                             void (*flush_fn)(void *))
 {
+        struct inode *inode = mapping->host;
        int ret = 0;
        int done = 0;
        int nr_to_write_done = 0;
@@ -3333,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        int scanned = 0;
        int tag;
+        /*
+         * We have to hold onto the inode so that ordered extents can do their
+         * work when the IO finishes.  The alternative to this is failing to add
+         * an ordered extent if the igrab() fails there and that is a huge pain
+         * to deal with, so instead just hold onto the inode throughout the
+         * writepages operation.  If it fails here we are freeing up the inode
+         * anyway and we'd rather not waste our time writing out stuff that is
+         * going to be truncated anyway.
+         */
+        if (!igrab(inode))
+                return 0;
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
                index = mapping->writeback_index; /* Start from prev offset */
@@ -3427,6 +3441,7 @@ retry:
                index = 0;
                goto retry;
        }
+        btrfs_add_delayed_iput(inode);
        return ret;
 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 70dc8ca73e25..9aa01ec2138d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
                                    loff_t *ppos, size_t count, size_t ocount)
 {
        struct file *file = iocb->ki_filp;
-        struct inode *inode = fdentry(file)->d_inode;
        struct iov_iter i;
        ssize_t written;
        ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
        written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
                                            count, ocount);
-        /*
-         * the generic O_DIRECT will update in-memory i_size after the
-         * DIOs are done.  But our endio handlers that update the on
-         * disk i_size never update past the in memory i_size.  So we
-         * need one more update here to catch any additions to the
-         * file
-         */
-        if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
-                btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
-                mark_inode_dirty(inode);
-        }
        if (written < 0 || written == count)
                return written;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 81296c57405a..6c4e2baa9290 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1543,29 +1543,26 @@ again:
        end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
        /*
-         * XXX - this can go away after a few releases.
+         * We need to search for bits in this bitmap.  We could only cover some
-         *
+         * of the extent in this bitmap thanks to how we add space, so we need
-         * since the only user of btrfs_remove_free_space is the tree logging
+         * to search for as much as it as we can and clear that amount, and then
-         * stuff, and the only way to test that is under crash conditions, we
+         * go searching for the next bit.
-         * want to have this debug stuff here just in case somethings not
-         * working.  Search the bitmap for the space we are trying to use to
-         * make sure its actually there.  If its not there then we need to stop
-         * because something has gone wrong.
         */
        search_start = *offset;
-        search_bytes = *bytes;
+        search_bytes = ctl->unit;
        search_bytes = min(search_bytes, end - search_start + 1);
        ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
        BUG_ON(ret < 0 || search_start != *offset);
-        if (*offset > bitmap_info->offset && *offset + *bytes > end) {
+        /* We may have found more bits than what we need */
-                bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
+        search_bytes = min(search_bytes, *bytes);
-                *bytes -= end - *offset + 1;
-                *offset = end + 1;
+        /* Cannot clear past the end of the bitmap */
-        } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
+        search_bytes = min(search_bytes, end - search_start + 1);
-                bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
-                *bytes = 0;
+        bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
-        }
+        *offset += search_bytes;
+        *bytes -= search_bytes;
        if (*bytes) {
                struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1596,7 +1593,7 @@ again:
                 * everything over again.
                 */
                search_start = *offset;
-                search_bytes = *bytes;
+                search_bytes = ctl->unit;
                ret = search_bitmap(ctl, bitmap_info, &search_start,
                                    &search_bytes);
                if (ret < 0 || search_start != *offset)
@@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
 {
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct btrfs_free_space *info;
-        struct btrfs_free_space *next_info = NULL;
        int ret = 0;
        spin_lock(&ctl->tree_lock);
 again:
+        if (!bytes)
+                goto out_lock;
        info = tree_search_offset(ctl, offset, 0, 0);
        if (!info) {
                /*
@@ -1905,88 +1904,48 @@ again:
                }
        }
-        if (info->bytes < bytes && rb_next(&info->offset_index)) {
+        if (!info->bitmap) {
-                u64 end;
-                next_info = rb_entry(rb_next(&info->offset_index),
-                                             struct btrfs_free_space,
-                                             offset_index);
-                if (next_info->bitmap)
-                        end = next_info->offset +
-                              BITS_PER_BITMAP * ctl->unit - 1;
-                else
-                        end = next_info->offset + next_info->bytes;
-                if (next_info->bytes < bytes ||
-                    next_info->offset > offset || offset > end) {
-                        printk(KERN_CRIT "Found free space at %llu, size %llu,"
-                              " trying to use %llu\n",
-                              (unsigned long long)info->offset,
-                              (unsigned long long)info->bytes,
-                              (unsigned long long)bytes);
-                        WARN_ON(1);
-                        ret = -EINVAL;
-                        goto out_lock;
-                }
-                info = next_info;
-        }
-        if (info->bytes == bytes) {
                unlink_free_space(ctl, info);
-                if (info->bitmap) {
+                if (offset == info->offset) {
-                        kfree(info->bitmap);
+                        u64 to_free = min(bytes, info->bytes);
-                        ctl->total_bitmaps--;
-                }
+                        info->bytes -= to_free;
-                kmem_cache_free(btrfs_free_space_cachep, info);
+                        info->offset += to_free;
-                ret = 0;
+                        if (info->bytes) {
-                goto out_lock;
+                                ret = link_free_space(ctl, info);
-        }
+                                WARN_ON(ret);
+                        } else {
-        if (!info->bitmap && info->offset == offset) {
+                                kmem_cache_free(btrfs_free_space_cachep, info);
-                unlink_free_space(ctl, info);
+                        }
-                info->offset += bytes;
-                info->bytes -= bytes;
-                ret = link_free_space(ctl, info);
-                WARN_ON(ret);
-                goto out_lock;
-        }
-        if (!info->bitmap && info->offset <= offset &&
+                        offset += to_free;
-            info->offset + info->bytes >= offset + bytes) {
+                        bytes -= to_free;
-                u64 old_start = info->offset;
+                        goto again;
-                /*
+                } else {
-                 * we're freeing space in the middle of the info,
+                        u64 old_end = info->bytes + info->offset;
-                 * this can happen during tree log replay
-                 *
-                 * first unlink the old info and then
-                 * insert it again after the hole we're creating
-                 */
-                unlink_free_space(ctl, info);
-                if (offset + bytes < info->offset + info->bytes) {
-                        u64 old_end = info->offset + info->bytes;
-                        info->offset = offset + bytes;
+                        info->bytes = offset - info->offset;
-                        info->bytes = old_end - info->offset;
                        ret = link_free_space(ctl, info);
                        WARN_ON(ret);
                        if (ret)
                                goto out_lock;
-                } else {
-                        /* the hole we're creating ends at the end
-                         * of the info struct, just free the info
-                         */
-                        kmem_cache_free(btrfs_free_space_cachep, info);
-                }
-                spin_unlock(&ctl->tree_lock);
-                /* step two, insert a new info struct to cover
+                        /* Not enough bytes in this entry to satisfy us */
-                 * anything before the hole
+                        if (old_end < offset + bytes) {
-                 */
+                                bytes -= old_end - offset;
-                ret = btrfs_add_free_space(block_group, old_start,
+                                offset = old_end;
-                                           offset - old_start);
+                                goto again;
-                WARN_ON(ret); /* -ENOMEM */
+                        } else if (old_end == offset + bytes) {
-                goto out;
+                                /* all done */
+                                goto out_lock;
+                        }
+                        spin_unlock(&ctl->tree_lock);
+                        ret = btrfs_add_free_space(block_group, offset + bytes,
+                                                   old_end - (offset + bytes));
+                        WARN_ON(ret);
+                        goto out;
+                }
        }
        ret = remove_from_bitmap(ctl, info, &offset, &bytes);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f6ab6f5e635a..a7d1921ac76b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode,
        if (IS_ERR(trans)) {
                extent_clear_unlock_delalloc(inode,
                             &BTRFS_I(inode)->io_tree,
-                             start, end, NULL,
+                             start, end, locked_page,
                             EXTENT_CLEAR_UNLOCK_PAGE |
                             EXTENT_CLEAR_UNLOCK |
                             EXTENT_CLEAR_DELALLOC |
@@ -963,7 +963,7 @@ out:
 out_unlock:
        extent_clear_unlock_delalloc(inode,
                     &BTRFS_I(inode)->io_tree,
-                     start, end, NULL,
+                     start, end, locked_page,
                     EXTENT_CLEAR_UNLOCK_PAGE |
                     EXTENT_CLEAR_UNLOCK |
                     EXTENT_CLEAR_DELALLOC |
@@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work)
        compress_file_range(async_cow->inode, async_cow->locked_page,
                            async_cow->start, async_cow->end, async_cow,
                            &num_added);
-        if (num_added == 0)
+        if (num_added == 0) {
+                btrfs_add_delayed_iput(async_cow->inode);
                async_cow->inode = NULL;
+        }
 }
 /*
@@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
 {
        struct async_cow *async_cow;
        async_cow = container_of(work, struct async_cow, work);
+        if (async_cow->inode)
+                btrfs_add_delayed_iput(async_cow->inode);
        kfree(async_cow);
 }
@@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
        while (start < end) {
                async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
                BUG_ON(!async_cow); /* -ENOMEM */
-                async_cow->inode = inode;
+                async_cow->inode = igrab(inode);
                async_cow->root = root;
                async_cow->locked_page = locked_page;
                async_cow->start = start;
@@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        u64 ino = btrfs_ino(inode);
        path = btrfs_alloc_path();
-        if (!path)
+        if (!path) {
+                extent_clear_unlock_delalloc(inode,
+                             &BTRFS_I(inode)->io_tree,
+                             start, end, locked_page,
+                             EXTENT_CLEAR_UNLOCK_PAGE |
+                             EXTENT_CLEAR_UNLOCK |
+                             EXTENT_CLEAR_DELALLOC |
+                             EXTENT_CLEAR_DIRTY |
+                             EXTENT_SET_WRITEBACK |
+                             EXTENT_END_WRITEBACK);
                return -ENOMEM;
+        }
        nolock = btrfs_is_free_space_inode(root, inode);
@@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode,
                trans = btrfs_join_transaction(root);
        if (IS_ERR(trans)) {
+                extent_clear_unlock_delalloc(inode,
+                             &BTRFS_I(inode)->io_tree,
+                             start, end, locked_page,
+                             EXTENT_CLEAR_UNLOCK_PAGE |
+                             EXTENT_CLEAR_UNLOCK |
+                             EXTENT_CLEAR_DELALLOC |
+                             EXTENT_CLEAR_DIRTY |
+                             EXTENT_SET_WRITEBACK |
+                             EXTENT_END_WRITEBACK);
                btrfs_free_path(path);
                return PTR_ERR(trans);
        }
@@ -1327,8 +1350,11 @@ out_check:
        }
        btrfs_release_path(path);
-        if (cur_offset <= end && cow_start == (u64)-1)
+        if (cur_offset <= end && cow_start == (u64)-1) {
                cow_start = cur_offset;
+                cur_offset = end;
+        }
        if (cow_start != (u64)-1) {
                ret = cow_file_range(inode, locked_page, cow_start, end,
                                     page_started, nr_written, 1);
@@ -1347,6 +1373,17 @@ error:
        if (!ret)
                ret = err;
+        if (ret && cur_offset < end)
+                extent_clear_unlock_delalloc(inode,
+                             &BTRFS_I(inode)->io_tree,
+                             cur_offset, end, locked_page,
+                             EXTENT_CLEAR_UNLOCK_PAGE |
+                             EXTENT_CLEAR_UNLOCK |
+                             EXTENT_CLEAR_DELALLOC |
+                             EXTENT_CLEAR_DIRTY |
+                             EXTENT_SET_WRITEBACK |
+                             EXTENT_END_WRITEBACK);
        btrfs_free_path(path);
        return ret;
 }
@@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
        int ret;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-        if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)
+        if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 1, nr_written);
-        else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
+        } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
-        else if (!btrfs_test_opt(root, COMPRESS) &&
+        } else if (!btrfs_test_opt(root, COMPRESS) &&
-                 !(BTRFS_I(inode)->force_compress) &&
+                   !(BTRFS_I(inode)->force_compress) &&
-                 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
+                   !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
                ret = cow_file_range(inode, locked_page, start, end,
                                      page_started, nr_written, 1);
-        else
+        } else {
+                set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                        &BTRFS_I(inode)->runtime_flags);
                ret = cow_file_range_async(inode, locked_page, start, end,
                                           page_started, nr_written);
+        }
        return ret;
 }
@@ -3714,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
        btrfs_wait_ordered_range(inode, 0, (u64)-1);
        if (root->fs_info->log_root_recovering) {
-                BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+                BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                                 &BTRFS_I(inode)->runtime_flags));
                goto no_delete;
        }
@@ -5836,8 +5876,17 @@ map:
        bh_result->b_size = len;
        bh_result->b_bdev = em->bdev;
        set_buffer_mapped(bh_result);
-        if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+        if (create) {
-                set_buffer_new(bh_result);
+                if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+                        set_buffer_new(bh_result);
+                /*
+                 * Need to update the i_size under the extent lock so buffered
+                 * readers will get the updated i_size when we unlock.
+                 */
+                if (start + len > i_size_read(inode))
+                        i_size_write(inode, start + len);
+        }
        free_extent_map(em);
@@ -6320,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                 */
                ordered = btrfs_lookup_ordered_range(inode, lockstart,
                                                     lockend - lockstart + 1);
-                if (!ordered)
+                /*
+                 * We need to make sure there are no buffered pages in this
+                 * range either, we could have raced between the invalidate in
+                 * generic_file_direct_write and locking the extent.  The
+                 * invalidate needs to happen so that reads after a write do not
+                 * get stale data.
+                 */
+                if (!ordered && (!writing ||
+                    !test_range_bit(&BTRFS_I(inode)->io_tree,
+                                    lockstart, lockend, EXTENT_UPTODATE, 0,
+                                    cached_state)))
                        break;
                unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                                     &cached_state, GFP_NOFS);
-                btrfs_start_ordered_extent(inode, ordered, 1);
-                btrfs_put_ordered_extent(ordered);
+                if (ordered) {
+                        btrfs_start_ordered_extent(inode, ordered, 1);
+                        btrfs_put_ordered_extent(ordered);
+                } else {
+                        /* Screw you mmap */
+                        ret = filemap_write_and_wait_range(file->f_mapping,
+                                                           lockstart,
+                                                           lockend);
+                        if (ret)
+                                goto out;
+                        /*
+                         * If we found a page that couldn't be invalidated just
+                         * fall back to buffered.
+                         */
+                        ret = invalidate_inode_pages2_range(file->f_mapping,
+                                        lockstart >> PAGE_CACHE_SHIFT,
+                                        lockend >> PAGE_CACHE_SHIFT);
+                        if (ret) {
+                                if (ret == -EBUSY)
+                                        ret = 0;
+                                goto out;
+                        }
+                }
                cond_resched();
        }
@@ -7054,10 +7139,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode)
        else
                b_inode->flags &= ~BTRFS_INODE_NODATACOW;
-        if (b_dir->flags & BTRFS_INODE_COMPRESS)
+        if (b_dir->flags & BTRFS_INODE_COMPRESS) {
                b_inode->flags |= BTRFS_INODE_COMPRESS;
-        else
+                b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS;
-                b_inode->flags &= ~BTRFS_INODE_COMPRESS;
+        } else {
+                b_inode->flags &= ~(BTRFS_INODE_COMPRESS |
+                                    BTRFS_INODE_NOCOMPRESS);
+        }
 }
 static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 24b776c08d99..0e92e5763005 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -52,6 +52,7 @@
 #include "locking.h"
 #include "inode-map.h"
 #include "backref.h"
+#include "rcu-string.h"
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -785,39 +786,57 @@ none:
        return -ENOENT;
 }
-/*
+static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
- * Validaty check of prev em and next em:
- * 1) no prev/next em
- * 2) prev/next em is an hole/inline extent
- */
-static int check_adjacent_extents(struct inode *inode, struct extent_map *em)
 {
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-        struct extent_map *prev = NULL, *next = NULL;
+        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-        int ret = 0;
+        struct extent_map *em;
+        u64 len = PAGE_CACHE_SIZE;
+        /*
+         * hopefully we have this extent in the tree already, try without
+         * the full extent lock
+         */
        read_lock(&em_tree->lock);
-        prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1);
+        em = lookup_extent_mapping(em_tree, start, len);
-        next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1);
        read_unlock(&em_tree->lock);
-        if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) &&
+        if (!em) {
-            (!next || next->block_start >= EXTENT_MAP_LAST_BYTE))
+                /* get the big lock and read metadata off disk */
-                ret = 1;
+                lock_extent(io_tree, start, start + len - 1);
-        free_extent_map(prev);
+                em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-        free_extent_map(next);
+                unlock_extent(io_tree, start, start + len - 1);
+                if (IS_ERR(em))
+                        return NULL;
+        }
+        return em;
+}
+static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
+{
+        struct extent_map *next;
+        bool ret = true;
+        /* this is the last extent */
+        if (em->start + em->len >= i_size_read(inode))
+                return false;
+        next = defrag_lookup_extent(inode, em->start + em->len);
+        if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
+                ret = false;
+        free_extent_map(next);
        return ret;
 }
-static int should_defrag_range(struct inode *inode, u64 start, u64 len,
+static int should_defrag_range(struct inode *inode, u64 start, int thresh,
-                               int thresh, u64 *last_len, u64 *skip,
+                               u64 *last_len, u64 *skip, u64 *defrag_end)
-                               u64 *defrag_end)
 {
-        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+        struct extent_map *em;
-        struct extent_map *em = NULL;
-        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 1;
+        bool next_mergeable = true;
        /*
         * make sure that once we start defragging an extent, we keep on
@@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
        *skip = 0;
-        /*
+        em = defrag_lookup_extent(inode, start);
-         * hopefully we have this extent in the tree already, try without
+        if (!em)
-         * the full extent lock
+                return 0;
-         */
-        read_lock(&em_tree->lock);
-        em = lookup_extent_mapping(em_tree, start, len);
-        read_unlock(&em_tree->lock);
-        if (!em) {
-                /* get the big lock and read metadata off disk */
-                lock_extent(io_tree, start, start + len - 1);
-                em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-                unlock_extent(io_tree, start, start + len - 1);
-                if (IS_ERR(em))
-                        return 0;
-        }
        /* this will cover holes, and inline extents */
        if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
@@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
                goto out;
        }
-        /* If we have nothing to merge with us, just skip. */
+        next_mergeable = defrag_check_next_extent(inode, em);
-        if (check_adjacent_extents(inode, em)) {
-                ret = 0;
-                goto out;
-        }
        /*
-         * we hit a real extent, if it is big don't bother defragging it again
+         * we hit a real extent, if it is big or the next extent is not a
+         * real extent, don't bother defragging it
         */
-        if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
+        if ((*last_len == 0 || *last_len >= thresh) &&
+            (em->len >= thresh || !next_mergeable))
                ret = 0;
 out:
        /*
         * last_len ends up being a counter of how many bytes we've defragged.
@@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        break;
                if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
-                                         PAGE_CACHE_SIZE, extent_thresh,
+                                         extent_thresh, &last_len, &skip,
-                                         &last_len, &skip, &defrag_end)) {
+                                         &defrag_end)) {
                        unsigned long next;
                        /*
                         * the should_defrag function tells us how much to skip
@@ -1304,6 +1306,14 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
                ret = -EINVAL;
                goto out_free;
        }
+        if (device->fs_devices && device->fs_devices->seeding) {
+                printk(KERN_INFO "btrfs: resizer unable to apply on "
+                       "seeding device %llu\n",
+                       (unsigned long long)devid);
+                ret = -EINVAL;
+                goto out_free;
+        }
        if (!strcmp(sizestr, "max"))
                new_size = device->bdev->bd_inode->i_size;
        else {
@@ -1345,8 +1355,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
        do_div(new_size, root->sectorsize);
        new_size *= root->sectorsize;
-        printk(KERN_INFO "btrfs: new size for %s is %llu\n",
+        printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
-                device->name, (unsigned long long)new_size);
+                      rcu_str_deref(device->name),
+                      (unsigned long long)new_size);
        if (new_size > old_size) {
                trans = btrfs_start_transaction(root, 0);
@@ -2264,7 +2275,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
        di_args->total_bytes = dev->total_bytes;
        memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
        if (dev->name) {
-                strncpy(di_args->path, dev->name, sizeof(di_args->path));
+                struct rcu_string *name;
+                rcu_read_lock();
+                name = rcu_dereference(dev->name);
+                strncpy(di_args->path, name->str, sizeof(di_args->path));
+                rcu_read_unlock();
                di_args->path[sizeof(di_args->path) - 1] = 0;
        } else {
                di_args->path[0] = '\0';
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 497c530724cf..e440aa653c30 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
 #define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
                                   struct btrfs_ioctl_vol_args_v2)
-#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
 #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
 #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
                              struct btrfs_ioctl_scrub_args)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9e138cdc36c5..643335a4fe3c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
        /* start IO across the range first to instantiate any delalloc
         * extents
         */
-        filemap_write_and_wait_range(inode->i_mapping, start, orig_end);
+        filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+        /*
+         * So with compression we will find and lock a dirty page and clear the
+         * first one as dirty, setup an async extent, and immediately return
+         * with the entire range locked but with nobody actually marked with
+         * writeback.  So we can't just filemap_write_and_wait_range() and
+         * expect it to work since it will just kick off a thread to do the
+         * actual work.  So we need to call filemap_fdatawrite_range _again_
+         * since it will wait on the page lock, which won't be unlocked until
+         * after the pages have been marked as writeback and so we're good to go
+         * from there.  We have to do this otherwise we'll miss the ordered
+         * extents and that results in badness.  Please Josef, do not think you
+         * know better and pull this out at some point in the future, it is
+         * right and you are wrong.
+         */
+        if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                     &BTRFS_I(inode)->runtime_flags))
+                filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+        filemap_fdatawait_range(inode->i_mapping, start, orig_end);
        end = orig_end;
        found = 0;
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
new file mode 100644
index 000000000000..9e111e4576d4
--- /dev/null
+++ b/fs/btrfs/rcu-string.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 Red Hat.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+struct rcu_string {
+        struct rcu_head rcu;
+        char str[0];
+};
+static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
+{
+        size_t len = strlen(src) + 1;
+        struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) +
+                                         (len * sizeof(char)), mask);
+        if (!ret)
+                return ret;
+        strncpy(ret->str, src, len);
+        return ret;
+}
+static inline void rcu_string_free(struct rcu_string *str)
+{
+        if (str)
+                kfree_rcu(str, rcu);
+}
+#define printk_in_rcu(fmt, ...) do {    \
+        rcu_read_lock();                \
+        printk(fmt, __VA_ARGS__);       \
+        rcu_read_unlock();              \
+} while (0)
+#define printk_ratelimited_in_rcu(fmt, ...) do {        \
+        rcu_read_lock();                                \
+        printk_ratelimited(fmt, __VA_ARGS__);           \
+        rcu_read_unlock();                              \
+} while (0)
+#define rcu_str_deref(rcu_str) ({                               \
+        struct rcu_string *__str = rcu_dereference(rcu_str);    \
+        __str->str;                                             \
+})
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index a38cfa4f251e..b223620cd5a6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -26,6 +26,7 @@
 #include "backref.h"
 #include "extent_io.h"
 #include "check-integrity.h"
+#include "rcu-string.h"
 /*
 * This is only the first step towards a full-features scrub. It reads all
@@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
         * hold all of the paths here
         */
        for (i = 0; i < ipath->fspath->elem_cnt; ++i)
-                printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
+                printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
                        "%s, sector %llu, root %llu, inode %llu, offset %llu, "
                        "length %llu, links %u (path: %s)\n", swarn->errstr,
-                        swarn->logical, swarn->dev->name,
+                        swarn->logical, rcu_str_deref(swarn->dev->name),
                        (unsigned long long)swarn->sector, root, inum, offset,
                        min(isize - offset, (u64)PAGE_SIZE), nlink,
                        (char *)(unsigned long)ipath->fspath->val[i]);
@@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
        return 0;
 err:
-        printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
+        printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
                "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
                "resolving failed with ret=%d\n", swarn->errstr,
-                swarn->logical, swarn->dev->name,
+                swarn->logical, rcu_str_deref(swarn->dev->name),
                (unsigned long long)swarn->sector, root, inum, offset, ret);
        free_ipath(ipath);
@@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
                do {
                        ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
                                                        &ref_root, &ref_level);
-                        printk(KERN_WARNING
+                        printk_in_rcu(KERN_WARNING
                                "btrfs: %s at logical %llu on dev %s, "
                                "sector %llu: metadata %s (level %d) in tree "
-                                "%llu\n", errstr, swarn.logical, dev->name,
+                                "%llu\n", errstr, swarn.logical,
+                                rcu_str_deref(dev->name),
                                (unsigned long long)swarn.sector,
                                ref_level ? "node" : "leaf",
                                ret < 0 ? -1 : ref_level,
@@ -580,9 +582,11 @@ out:
                spin_lock(&sdev->stat_lock);
                ++sdev->stat.uncorrectable_errors;
                spin_unlock(&sdev->stat_lock);
-                printk_ratelimited(KERN_ERR
+                printk_ratelimited_in_rcu(KERN_ERR
                        "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
-                        (unsigned long long)fixup->logical, sdev->dev->name);
+                        (unsigned long long)fixup->logical,
+                        rcu_str_deref(sdev->dev->name));
        }
        btrfs_free_path(path);
@@ -936,18 +940,20 @@ corrected_error:
                        spin_lock(&sdev->stat_lock);
                        sdev->stat.corrected_errors++;
                        spin_unlock(&sdev->stat_lock);
-                        printk_ratelimited(KERN_ERR
+                        printk_ratelimited_in_rcu(KERN_ERR
                                "btrfs: fixed up error at logical %llu on dev %s\n",
-                                (unsigned long long)logical, sdev->dev->name);
+                                (unsigned long long)logical,
+                                rcu_str_deref(sdev->dev->name));
                }
        } else {
 did_not_correct_error:
                spin_lock(&sdev->stat_lock);
                sdev->stat.uncorrectable_errors++;
                spin_unlock(&sdev->stat_lock);
-                printk_ratelimited(KERN_ERR
+                printk_ratelimited_in_rcu(KERN_ERR
                        "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
-                        (unsigned long long)logical, sdev->dev->name);
+                        (unsigned long long)logical,
+                        rcu_str_deref(sdev->dev->name));
        }
 out:
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 96eb9fef7bd2..e23991574fdf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -54,6 +54,7 @@
 #include "version.h"
 #include "export.h"
 #include "compression.h"
+#include "rcu-string.h"
 #define CREATE_TRACE_POINTS
 #include <trace/events/btrfs.h>
@@ -1186,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                if (ret)
                        goto restore;
+                ret = btrfs_resume_balance_async(fs_info);
+                if (ret)
+                        goto restore;
                sb->s_flags &= ~MS_RDONLY;
        }
@@ -1482,12 +1487,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
                                   "error %d\n", btrfs_ino(inode), ret);
 }
+static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
+{
+        struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
+        struct btrfs_fs_devices *cur_devices;
+        struct btrfs_device *dev, *first_dev = NULL;
+        struct list_head *head;
+        struct rcu_string *name;
+        mutex_lock(&fs_info->fs_devices->device_list_mutex);
+        cur_devices = fs_info->fs_devices;
+        while (cur_devices) {
+                head = &cur_devices->devices;
+                list_for_each_entry(dev, head, dev_list) {
+                        if (!first_dev || dev->devid < first_dev->devid)
+                                first_dev = dev;
+                }
+                cur_devices = cur_devices->seed;
+        }
+        if (first_dev) {
+                rcu_read_lock();
+                name = rcu_dereference(first_dev->name);
+                seq_escape(m, name->str, " \t\n\\");
+                rcu_read_unlock();
+        } else {
+                WARN_ON(1);
+        }
+        mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+        return 0;
+}
 static const struct super_operations btrfs_super_ops = {
        .drop_inode     = btrfs_drop_inode,
        .evict_inode    = btrfs_evict_inode,
        .put_super      = btrfs_put_super,
        .sync_fs        = btrfs_sync_fs,
        .show_options   = btrfs_show_options,
+        .show_devname   = btrfs_show_devname,
        .write_inode    = btrfs_write_inode,
        .dirty_inode    = btrfs_fs_dirty_inode,
        .alloc_inode    = btrfs_alloc_inode,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1791c6e3d834..b72b068183ec 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -100,6 +100,10 @@ loop:
                kmem_cache_free(btrfs_transaction_cachep, cur_trans);
                cur_trans = fs_info->running_transaction;
                goto loop;
+        } else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+                spin_unlock(&root->fs_info->trans_lock);
+                kmem_cache_free(btrfs_transaction_cachep, cur_trans);
+                return -EROFS;
        }
        atomic_set(&cur_trans->num_writers, 1);
@@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 static void cleanup_transaction(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root)
+                                struct btrfs_root *root, int err)
 {
        struct btrfs_transaction *cur_trans = trans->transaction;
        WARN_ON(trans->use_count > 1);
+        btrfs_abort_transaction(trans, root, err);
        spin_lock(&root->fs_info->trans_lock);
        list_del_init(&cur_trans->list);
+        if (cur_trans == root->fs_info->running_transaction) {
+                root->fs_info->running_transaction = NULL;
+                root->fs_info->trans_no_join = 0;
+        }
        spin_unlock(&root->fs_info->trans_lock);
        btrfs_cleanup_one_transaction(trans->transaction, root);
@@ -1526,7 +1536,7 @@ cleanup_transaction:
 //      WARN_ON(1);
        if (current->journal_info == trans)
                current->journal_info = NULL;
-        cleanup_transaction(trans, root);
+        cleanup_transaction(trans, root, ret);
        return ret;
 }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2017d0ff511c..8abeae4224f9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
        kfree(name);
        iput(inode);
+        btrfs_run_delayed_items(trans, root);
        return ret;
 }
@@ -895,6 +897,7 @@ again:
                                ret = btrfs_unlink_inode(trans, root, dir,
                                                         inode, victim_name,
                                                         victim_name_len);
+                                btrfs_run_delayed_items(trans, root);
                        }
                        kfree(victim_name);
                        ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
                        ret = btrfs_unlink_inode(trans, root, dir, inode,
                                                 name, name_len);
                        BUG_ON(ret);
+                        btrfs_run_delayed_items(trans, root);
                        kfree(name);
                        iput(inode);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7782020996fe..ecaad40e7ef4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -35,6 +35,7 @@
 #include "volumes.h"
 #include "async-thread.h"
 #include "check-integrity.h"
+#include "rcu-string.h"
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
@@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
                device = list_entry(fs_devices->devices.next,
                                    struct btrfs_device, dev_list);
                list_del(&device->dev_list);
-                kfree(device->name);
+                rcu_string_free(device->name);
                kfree(device);
        }
        kfree(fs_devices);
@@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,
 {
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices;
+        struct rcu_string *name;
        u64 found_transid = btrfs_super_generation(disk_super);
-        char *name;
        fs_devices = find_fsid(disk_super->fsid);
        if (!fs_devices) {
@@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,
                memcpy(device->uuid, disk_super->dev_item.uuid,
                       BTRFS_UUID_SIZE);
                spin_lock_init(&device->io_lock);
-                device->name = kstrdup(path, GFP_NOFS);
-                if (!device->name) {
+                name = rcu_string_strdup(path, GFP_NOFS);
+                if (!name) {
                        kfree(device);
                        return -ENOMEM;
                }
+                rcu_assign_pointer(device->name, name);
                INIT_LIST_HEAD(&device->dev_alloc_list);
                /* init readahead state */
@@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
-        } else if (!device->name || strcmp(device->name, path)) {
+        } else if (!device->name || strcmp(device->name->str, path)) {
-                name = kstrdup(path, GFP_NOFS);
+                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name)
                        return -ENOMEM;
-                kfree(device->name);
+                rcu_string_free(device->name);
-                device->name = name;
+                rcu_assign_pointer(device->name, name);
                if (device->missing) {
                        fs_devices->missing_devices--;
                        device->missing = 0;
@@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
        /* We have held the volume lock, it is safe to get the devices. */
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
+                struct rcu_string *name;
                device = kzalloc(sizeof(*device), GFP_NOFS);
                if (!device)
                        goto error;
-                device->name = kstrdup(orig_dev->name, GFP_NOFS);
+                /*
-                if (!device->name) {
+                 * This is ok to do without rcu read locked because we hold the
+                 * uuid mutex so nothing we touch in here is going to disappear.
+                 */
+                name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+                if (!name) {
                        kfree(device);
                        goto error;
                }
+                rcu_assign_pointer(device->name, name);
                device->devid = orig_dev->devid;
                device->work.func = pending_bios_fn;
@@ -491,7 +501,7 @@ again:
                }
                list_del_init(&device->dev_list);
                fs_devices->num_devices--;
-                kfree(device->name);
+                rcu_string_free(device->name);
                kfree(device);
        }
@@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)
        if (device->bdev)
                blkdev_put(device->bdev, device->mode);
-        kfree(device->name);
+        rcu_string_free(device->name);
        kfree(device);
 }
@@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
        mutex_lock(&fs_devices->device_list_mutex);
        list_for_each_entry(device, &fs_devices->devices, dev_list) {
                struct btrfs_device *new_device;
+                struct rcu_string *name;
                if (device->bdev)
                        fs_devices->open_devices--;
@@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
                BUG_ON(!new_device); /* -ENOMEM */
                memcpy(new_device, device, sizeof(*new_device));
-                new_device->name = kstrdup(device->name, GFP_NOFS);
-                BUG_ON(device->name && !new_device->name); /* -ENOMEM */
+                /* Safe because we are under uuid_mutex */
+                name = rcu_string_strdup(device->name->str, GFP_NOFS);
+                BUG_ON(device->name && !name); /* -ENOMEM */
+                rcu_assign_pointer(new_device->name, name);
                new_device->bdev = NULL;
                new_device->writeable = 0;
                new_device->in_fs_metadata = 0;
@@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                if (!device->name)
                        continue;
-                bdev = blkdev_get_by_path(device->name, flags, holder);
+                bdev = blkdev_get_by_path(device->name->str, flags, holder);
                if (IS_ERR(bdev)) {
-                        printk(KERN_INFO "open %s failed\n", device->name);
+                        printk(KERN_INFO "open %s failed\n", device->name->str);
                        goto error;
                }
                filemap_write_and_wait(bdev->bd_inode->i_mapping);
@@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        struct block_device *bdev;
        struct list_head *devices;
        struct super_block *sb = root->fs_info->sb;
+        struct rcu_string *name;
        u64 total_bytes;
        int seeding_dev = 0;
        int ret = 0;
@@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                goto error;
        }
-        device->name = kstrdup(device_path, GFP_NOFS);
+        name = rcu_string_strdup(device_path, GFP_NOFS);
-        if (!device->name) {
+        if (!name) {
                kfree(device);
                ret = -ENOMEM;
                goto error;
        }
+        rcu_assign_pointer(device->name, name);
        ret = find_next_devid(root, &device->devid);
        if (ret) {
-                kfree(device->name);
+                rcu_string_free(device->name);
                kfree(device);
                goto error;
        }
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
-                kfree(device->name);
+                rcu_string_free(device->name);
                kfree(device);
                ret = PTR_ERR(trans);
                goto error;
@@ -1796,7 +1812,7 @@ error_trans:
        unlock_chunks(root);
        btrfs_abort_transaction(trans, root, ret);
        btrfs_end_transaction(trans, root);
-        kfree(device->name);
+        rcu_string_free(device->name);
        kfree(device);
 error:
        blkdev_put(bdev, FMODE_EXCL);
@@ -2829,31 +2845,48 @@ out:
 static int balance_kthread(void *data)
 {
-        struct btrfs_balance_control *bctl =
+        struct btrfs_fs_info *fs_info = data;
-                        (struct btrfs_balance_control *)data;
-        struct btrfs_fs_info *fs_info = bctl->fs_info;
        int ret = 0;
        mutex_lock(&fs_info->volume_mutex);
        mutex_lock(&fs_info->balance_mutex);
-        set_balance_control(bctl);
+        if (fs_info->balance_ctl) {
-        if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
-                printk(KERN_INFO "btrfs: force skipping balance\n");
-        } else {
                printk(KERN_INFO "btrfs: continuing balance\n");
-                ret = btrfs_balance(bctl, NULL);
+                ret = btrfs_balance(fs_info->balance_ctl, NULL);
        }
        mutex_unlock(&fs_info->balance_mutex);
        mutex_unlock(&fs_info->volume_mutex);
        return ret;
 }
-int btrfs_recover_balance(struct btrfs_root *tree_root)
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
 {
        struct task_struct *tsk;
+        spin_lock(&fs_info->balance_lock);
+        if (!fs_info->balance_ctl) {
+                spin_unlock(&fs_info->balance_lock);
+                return 0;
+        }
+        spin_unlock(&fs_info->balance_lock);
+        if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+                printk(KERN_INFO "btrfs: force skipping balance\n");
+                return 0;
+        }
+        tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
+        if (IS_ERR(tsk))
+                return PTR_ERR(tsk);
+        return 0;
+}
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
+{
        struct btrfs_balance_control *bctl;
        struct btrfs_balance_item *item;
        struct btrfs_disk_balance_args disk_bargs;
@@ -2866,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
        if (!path)
                return -ENOMEM;
-        bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
-        if (!bctl) {
-                ret = -ENOMEM;
-                goto out;
-        }
        key.objectid = BTRFS_BALANCE_OBJECTID;
        key.type = BTRFS_BALANCE_ITEM_KEY;
        key.offset = 0;
-        ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+        ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
        if (ret < 0)
-                goto out_bctl;
+                goto out;
        if (ret > 0) { /* ret = -ENOENT; */
                ret = 0;
-                goto out_bctl;
+                goto out;
+        }
+        bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+        if (!bctl) {
+                ret = -ENOMEM;
+                goto out;
        }
        leaf = path->nodes[0];
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
-        bctl->fs_info = tree_root->fs_info;
+        bctl->fs_info = fs_info;
-        bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
+        bctl->flags = btrfs_balance_flags(leaf, item);
+        bctl->flags |= BTRFS_BALANCE_RESUME;
        btrfs_balance_data(leaf, item, &disk_bargs);
        btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2897,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
        btrfs_balance_sys(leaf, item, &disk_bargs);
        btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
-        tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
+        mutex_lock(&fs_info->volume_mutex);
-        if (IS_ERR(tsk))
+        mutex_lock(&fs_info->balance_mutex);
-                ret = PTR_ERR(tsk);
-        else
-                goto out;
-out_bctl:
+        set_balance_control(bctl);
-        kfree(bctl);
+        mutex_unlock(&fs_info->balance_mutex);
+        mutex_unlock(&fs_info->volume_mutex);
 out:
        btrfs_free_path(path);
        return ret;
@@ -4045,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
                        BUG_ON(stripe_index >= bbio->num_stripes);
                        dev = bbio->stripes[stripe_index].dev;
-                        if (bio->bi_rw & WRITE)
+                        if (dev->bdev) {
-                                btrfs_dev_stat_inc(dev,
+                                if (bio->bi_rw & WRITE)
-                                                   BTRFS_DEV_STAT_WRITE_ERRS);
+                                        btrfs_dev_stat_inc(dev,
-                        else
+                                                BTRFS_DEV_STAT_WRITE_ERRS);
-                                btrfs_dev_stat_inc(dev,
+                                else
-                                                   BTRFS_DEV_STAT_READ_ERRS);
+                                        btrfs_dev_stat_inc(dev,
-                        if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
+                                                BTRFS_DEV_STAT_READ_ERRS);
-                                btrfs_dev_stat_inc(dev,
+                                if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
-                                                   BTRFS_DEV_STAT_FLUSH_ERRS);
+                                        btrfs_dev_stat_inc(dev,
-                        btrfs_dev_stat_print_on_error(dev);
+                                                BTRFS_DEV_STAT_FLUSH_ERRS);
+                                btrfs_dev_stat_print_on_error(dev);
+                        }
                }
        }
@@ -4204,10 +4239,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
                dev = bbio->stripes[dev_nr].dev;
                if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
+#ifdef DEBUG
+                        struct rcu_string *name;
+                        rcu_read_lock();
+                        name = rcu_dereference(dev->name);
                        pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
                                 "(%s id %llu), size=%u\n", rw,
                                 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
-                                 dev->name, dev->devid, bio->bi_size);
+                                 name->str, dev->devid, bio->bi_size);
+                        rcu_read_unlock();
+#endif
                        bio->bi_bdev = dev->bdev;
                        if (async_submit)
                                schedule_bio(root, dev, rw, bio);
@@ -4694,8 +4736,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
                key.offset = device->devid;
                ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
                if (ret) {
-                        printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
+                        printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
-                               device->name, (unsigned long long)device->devid);
+                                      rcu_str_deref(device->name),
+                                      (unsigned long long)device->devid);
                        __btrfs_reset_dev_stats(device);
                        device->dev_stats_valid = 1;
                        btrfs_release_path(path);
@@ -4747,8 +4790,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
        BUG_ON(!path);
        ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
        if (ret < 0) {
-                printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
+                printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
-                       ret, device->name);
+                              ret, rcu_str_deref(device->name));
                goto out;
        }
@@ -4757,8 +4800,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
                /* need to delete old one and insert a new one */
                ret = btrfs_del_item(trans, dev_root, path);
                if (ret != 0) {
-                        printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
+                        printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
-                               device->name, ret);
+                                      rcu_str_deref(device->name), ret);
                        goto out;
                }
                ret = 1;
@@ -4770,8 +4813,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
                ret = btrfs_insert_empty_item(trans, dev_root, path,
                                              &key, sizeof(*ptr));
                if (ret < 0) {
-                        printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
+                        printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
-                               device->name, ret);
+                                      rcu_str_deref(device->name), ret);
                        goto out;
                }
        }
@@ -4823,9 +4866,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
 {
        if (!dev->dev_stats_valid)
                return;
-        printk_ratelimited(KERN_ERR
+        printk_ratelimited_in_rcu(KERN_ERR
                           "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
-                           dev->name,
+                           rcu_str_deref(dev->name),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
                           btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
@@ -4837,8 +4880,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
 {
-        printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
+        printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
-               dev->name,
+               rcu_str_deref(dev->name),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
               btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 3406a88ca83e..95f6637614db 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -58,7 +58,7 @@ struct btrfs_device {
        /* the mode sent to blkdev_get */
        fmode_t mode;
-        char *name;
+        struct rcu_string *name;
        /* the internal btrfs device id */
        u64 devid;
@@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 int btrfs_init_new_device(struct btrfs_root *root, char *path);
 int btrfs_balance(struct btrfs_balance_control *bctl,
                  struct btrfs_ioctl_balance_args *bargs);
-int btrfs_recover_balance(struct btrfs_root *tree_root);
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
 int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
diff --git a/fs/buffer.c b/fs/buffer.c
index 838a9cf246bd..c7062c896d7c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1036,6 +1036,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
 static struct buffer_head *
 __getblk_slow(struct block_device *bdev, sector_t block, int size)
 {
+        int ret;
+        struct buffer_head *bh;
        /* Size must be multiple of hard sectorsize */
        if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
                        (size < 512 || size > PAGE_SIZE))) {
@@ -1048,20 +1051,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
                return NULL;
        }
-        for (;;) {
+retry:
-                struct buffer_head * bh;
+        bh = __find_get_block(bdev, block, size);
-                int ret;
+        if (bh)
+                return bh;
+        ret = grow_buffers(bdev, block, size);
+        if (ret == 0) {
+                free_more_memory();
+                goto retry;
+        } else if (ret > 0) {
                bh = __find_get_block(bdev, block, size);
                if (bh)
                        return bh;
-                ret = grow_buffers(bdev, block, size);
-                if (ret < 0)
-                        return NULL;
-                if (ret == 0)
-                        free_more_memory();
        }
+        return NULL;
 }
 /*
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 173b1d22e59b..8b67304e4b80 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -54,7 +54,12 @@
        (CONGESTION_ON_THRESH(congestion_kb) -                          \
         (CONGESTION_ON_THRESH(congestion_kb) >> 2))
+static inline struct ceph_snap_context *page_snap_context(struct page *page)
+{
+        if (PagePrivate(page))
+                return (void *)page->private;
+        return NULL;
+}
 /*
 * Dirty a page.  Optimistically adjust accounting, on the assumption
@@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
 {
        struct inode *inode;
        struct ceph_inode_info *ci;
-        struct ceph_snap_context *snapc = (void *)page->private;
+        struct ceph_snap_context *snapc = page_snap_context(page);
        BUG_ON(!PageLocked(page));
-        BUG_ON(!page->private);
        BUG_ON(!PagePrivate(page));
        BUG_ON(!page->mapping);
@@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g)
        struct inode *inode = page->mapping ? page->mapping->host : NULL;
        dout("%p releasepage %p idx %lu\n", inode, page, page->index);
        WARN_ON(PageDirty(page));
-        WARN_ON(page->private);
        WARN_ON(PagePrivate(page));
        return 0;
 }
@@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        osdc = &fsc->client->osdc;
        /* verify this is a writeable snap context */
-        snapc = (void *)page->private;
+        snapc = page_snap_context(page);
        if (snapc == NULL) {
                dout("writepage %p page %p not dirty?\n", inode, page);
                goto out;
@@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        oldest = get_oldest_context(inode, &snap_size);
        if (snapc->seq > oldest->seq) {
                dout("writepage %p page %p snapc %p not writeable - noop\n",
-                     inode, page, (void *)page->private);
+                     inode, page, snapc);
                /* we should only noop if called by kswapd */
                WARN_ON((current->flags & PF_MEMALLOC) == 0);
                ceph_put_snap_context(oldest);
@@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req,
                        clear_bdi_congested(&fsc->backing_dev_info,
                                            BLK_RW_ASYNC);
-                ceph_put_snap_context((void *)page->private);
+                ceph_put_snap_context(page_snap_context(page));
                page->private = 0;
                ClearPagePrivate(page);
                dout("unlocking %d %p\n", i, page);
@@ -795,7 +798,7 @@ get_more_pages:
                        }
                        /* only if matching snap context */
-                        pgsnapc = (void *)page->private;
+                        pgsnapc = page_snap_context(page);
                        if (pgsnapc->seq > snapc->seq) {
                                dout("page snapc %p %lld > oldest %p %lld\n",
                                     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
@@ -984,7 +987,7 @@ retry_locked:
        BUG_ON(!ci->i_snap_realm);
        down_read(&mdsc->snap_rwsem);
        BUG_ON(!ci->i_snap_realm->cached_context);
-        snapc = (void *)page->private;
+        snapc = page_snap_context(page);
        if (snapc && snapc != ci->i_head_snapc) {
                /*
                 * this page is already dirty in another (older) snap
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5b400730c213..4ee522b3f66f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -86,7 +86,31 @@ static struct {
 #endif /* CONFIG_CIFS_WEAK_PW_HASH */
 #endif /* CIFS_POSIX */
-/* Forward declarations */
+#ifdef CONFIG_HIGHMEM
+/*
+ * On arches that have high memory, kmap address space is limited. By
+ * serializing the kmap operations on those arches, we ensure that we don't
+ * end up with a bunch of threads in writeback with partially mapped page
+ * arrays, stuck waiting for kmap to come back. That situation prevents
+ * progress and can deadlock.
+ */
+static DEFINE_MUTEX(cifs_kmap_mutex);
+static inline void
+cifs_kmap_lock(void)
+{
+        mutex_lock(&cifs_kmap_mutex);
+}
+static inline void
+cifs_kmap_unlock(void)
+{
+        mutex_unlock(&cifs_kmap_mutex);
+}
+#else /* !CONFIG_HIGHMEM */
+#define cifs_kmap_lock() do { ; } while(0)
+#define cifs_kmap_unlock() do { ; } while(0)
+#endif /* CONFIG_HIGHMEM */
 /* Mark as invalid, all open files on tree connections since they
   were closed when session to server was lost */
@@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
        }
        /* marshal up the page array */
+        cifs_kmap_lock();
        len = rdata->marshal_iov(rdata, data_len);
+        cifs_kmap_unlock();
        data_len -= len;
        /* issue the read if we have any iovecs left to fill */
@@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata)
         * and set the iov_len properly for each one. It may also set
         * wdata->bytes too.
         */
+        cifs_kmap_lock();
        wdata->marshal_iov(iov, wdata);
+        cifs_kmap_unlock();
        cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 78db68a5cf44..94b7788c3189 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1653,24 +1653,26 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                         * If yes, we have encountered a double deliminator
                         * reset the NULL character to the deliminator
                         */
-                        if (tmp_end < end && tmp_end[1] == delim)
+                        if (tmp_end < end && tmp_end[1] == delim) {
                                tmp_end[0] = delim;
-                        /* Keep iterating until we get to a single deliminator
+                                /* Keep iterating until we get to a single
-                         * OR the end
+                                 * deliminator OR the end
-                         */
+                                 */
-                        while ((tmp_end = strchr(tmp_end, delim)) != NULL &&
+                                while ((tmp_end = strchr(tmp_end, delim))
-                               (tmp_end[1] == delim)) {
+                                        != NULL && (tmp_end[1] == delim)) {
-                                tmp_end = (char *) &tmp_end[2];
+                                                tmp_end = (char *) &tmp_end[2];
-                        }
+                                }
-                        /* Reset var options to point to next element */
+                                /* Reset var options to point to next element */
-                        if (tmp_end) {
+                                if (tmp_end) {
-                                tmp_end[0] = '\0';
+                                        tmp_end[0] = '\0';
-                                options = (char *) &tmp_end[1];
+                                        options = (char *) &tmp_end[1];
-                        } else
+                                } else
-                                /* Reached the end of the mount option string */
+                                        /* Reached the end of the mount option
-                                options = end;
+                                         * string */
+                                        options = end;
+                        }
                        /* Now build new password string */
                        temp_len = strlen(value);
@@ -3443,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
 #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
+/*
+ * On hosts with high memory, we can't currently support wsize/rsize that are
+ * larger than we can kmap at once. Cap the rsize/wsize at
+ * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
+ * larger than that anyway.
+ */
+#ifdef CONFIG_HIGHMEM
+#define CIFS_KMAP_SIZE_LIMIT    (LAST_PKMAP * PAGE_CACHE_SIZE)
+#else /* CONFIG_HIGHMEM */
+#define CIFS_KMAP_SIZE_LIMIT    (1<<24)
+#endif /* CONFIG_HIGHMEM */
 static unsigned int
 cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 {
@@ -3473,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
                wsize = min_t(unsigned int, wsize,
                                server->maxBuf - sizeof(WRITE_REQ) + 4);
+        /* limit to the amount that we can kmap at once */
+        wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
        /* hard limit of CIFS_MAX_WSIZE */
        wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
@@ -3493,18 +3510,15 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
         * MS-CIFS indicates that servers are only limited by the client's
         * bufsize for reads, testing against win98se shows that it throws
         * INVALID_PARAMETER errors if you try to request too large a read.
+         * OS/2 just sends back short reads.
         *
-         * If the server advertises a MaxBufferSize of less than one page,
+         * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
-         * assume that it also can't satisfy reads larger than that either.
+         * it can't handle a read request larger than its MaxBufferSize either.
-         *
-         * FIXME: Is there a better heuristic for this?
         */
        if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
                defsize = CIFS_DEFAULT_IOSIZE;
        else if (server->capabilities & CAP_LARGE_READ_X)
                defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
-        else if (server->maxBuf >= PAGE_CACHE_SIZE)
-                defsize = CIFSMaxBufSize;
        else
                defsize = server->maxBuf - sizeof(READ_RSP);
@@ -3517,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
        if (!(server->capabilities & CAP_LARGE_READ_X))
                rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
+        /* limit to the amount that we can kmap at once */
+        rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
        /* hard limit of CIFS_MAX_RSIZE */
        rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0a8224d1c4c5..a4217f02fab2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
        dentry = d_lookup(parent, name);
        if (dentry) {
-                /* FIXME: check for inode number changes? */
+                inode = dentry->d_inode;
-                if (dentry->d_inode != NULL)
+                /* update inode in place if i_ino didn't change */
+                if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
+                        cifs_fattr_to_inode(inode, fattr);
                        return dentry;
+                }
                d_drop(dentry);
                dput(dentry);
        }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3097ee58fd7d..f25d4ea14be4 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
        if (mid == NULL)
                return -ENOMEM;
-        /* put it on the pending_mid_q */
-        spin_lock(&GlobalMid_Lock);
-        list_add_tail(&mid->qhead, &server->pending_mid_q);
-        spin_unlock(&GlobalMid_Lock);
        rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
-        if (rc)
+        if (rc) {
-                delete_mid(mid);
+                DeleteMidQEntry(mid);
+                return rc;
+        }
        *ret_mid = mid;
-        return rc;
+        return 0;
 }
 /*
@@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
        mid->callback_data = cbdata;
        mid->mid_state = MID_REQUEST_SUBMITTED;
+        /* put it on the pending_mid_q */
+        spin_lock(&GlobalMid_Lock);
+        list_add_tail(&mid->qhead, &server->pending_mid_q);
+        spin_unlock(&GlobalMid_Lock);
        cifs_in_send_inc(server);
        rc = smb_sendv(server, iov, nvec);
        cifs_in_send_dec(server);
        cifs_save_when_sent(mid);
        mutex_unlock(&server->srv_mutex);
-        if (rc)
+        if (rc == 0)
-                goto out_err;
+                return 0;
-        return rc;
-out_err:
        delete_mid(mid);
        add_credits(server, 1);
        wake_up(&server->request_q);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 69f994a7d524..0dbe58a8b172 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
        (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
        if (!IS_ERR(*lower_file))
                goto out;
-        if (flags & O_RDONLY) {
+        if ((flags & O_ACCMODE) == O_RDONLY) {
                rc = PTR_ERR((*lower_file));
                goto out;
        }
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 3a06f4043df4..c0038f6566d4 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
        mutex_lock(&ecryptfs_daemon_hash_mux);
        /* TODO: Just use file->private_data? */
        rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
-        BUG_ON(rc || !daemon);
+        if (rc || !daemon) {
+                mutex_unlock(&ecryptfs_daemon_hash_mux);
+                return -EINVAL;
+        }
        mutex_lock(&daemon->mux);
        mutex_unlock(&ecryptfs_daemon_hash_mux);
        if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
                goto out_unlock_daemon;
        }
        daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
+        file->private_data = daemon;
        atomic_inc(&ecryptfs_num_miscdev_opens);
 out_unlock_daemon:
        mutex_unlock(&daemon->mux);
@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
        mutex_lock(&ecryptfs_daemon_hash_mux);
        rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
-        BUG_ON(rc || !daemon);
+        if (rc || !daemon)
+                daemon = file->private_data;
        mutex_lock(&daemon->mux);
-        BUG_ON(daemon->pid != task_pid(current));
        BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
        daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
        atomic_dec(&ecryptfs_num_miscdev_opens);
@@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
                          struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
                          u16 msg_flags, struct ecryptfs_daemon *daemon)
 {
-        int rc = 0;
+        struct ecryptfs_message *msg;
-        mutex_lock(&msg_ctx->mux);
+        msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
-        msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
+        if (!msg) {
-                               GFP_KERNEL);
-        if (!msg_ctx->msg) {
-                rc = -ENOMEM;
                printk(KERN_ERR "%s: Out of memory whilst attempting "
                       "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
-                       (sizeof(*msg_ctx->msg) + data_size));
+                       (sizeof(*msg) + data_size));
-                goto out_unlock;
+                return -ENOMEM;
        }
+        mutex_lock(&msg_ctx->mux);
+        msg_ctx->msg = msg;
        msg_ctx->msg->index = msg_ctx->index;
        msg_ctx->msg->data_len = data_size;
        msg_ctx->type = msg_type;
        memcpy(msg_ctx->msg->data, data, data_size);
        msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
-        mutex_lock(&daemon->mux);
        list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
+        mutex_unlock(&msg_ctx->mux);
+        mutex_lock(&daemon->mux);
        daemon->num_queued_msg_ctx++;
        wake_up_interruptible(&daemon->wait);
        mutex_unlock(&daemon->mux);
-out_unlock:
-        mutex_unlock(&msg_ctx->mux);
+        return 0;
-        return rc;
 }
 /*
@@ -269,8 +274,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
        mutex_lock(&ecryptfs_daemon_hash_mux);
        /* TODO: Just use file->private_data? */
        rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
-        BUG_ON(rc || !daemon);
+        if (rc || !daemon) {
+                mutex_unlock(&ecryptfs_daemon_hash_mux);
+                return -EINVAL;
+        }
        mutex_lock(&daemon->mux);
+        if (task_pid(current) != daemon->pid) {
+                mutex_unlock(&daemon->mux);
+                mutex_unlock(&ecryptfs_daemon_hash_mux);
+                return -EPERM;
+        }
        if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
                rc = 0;
                mutex_unlock(&ecryptfs_daemon_hash_mux);
@@ -307,9 +320,6 @@ check_list:
                 * message from the queue; try again */
                goto check_list;
        }
-        BUG_ON(euid != daemon->euid);
-        BUG_ON(current_user_ns() != daemon->user_ns);
-        BUG_ON(task_pid(current) != daemon->pid);
        msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
                                   struct ecryptfs_msg_ctx, daemon_out_list);
        BUG_ON(!msg_ctx);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 74598f67efeb..1c8b55670804 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
                goto error_tgt_fput;
        /* Check if EPOLLWAKEUP is allowed */
-        if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP))
+        if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
                epds.events &= ~EPOLLWAKEUP;
        /*
diff --git a/fs/exec.c b/fs/exec.c
index a79786a8d2c8..da27b91ff1e8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -819,10 +819,10 @@ static int exec_mmap(struct mm_struct *mm)
        /* Notify parent that we're no longer interested in the old VM */
        tsk = current;
        old_mm = current->mm;
-        sync_mm_rss(old_mm);
        mm_release(tsk, old_mm);
        if (old_mm) {
+                sync_mm_rss(old_mm);
                /*
                 * Make sure that if there is a core dump in progress
                 * for the old mm, we get out and die instead of going
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 49cf230554a2..24a49d47e935 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
 out:
        ios->numdevs = devs_in_group;
        ios->pages_consumed = cur_pg;
-        if (unlikely(ret)) {
+        return ret;
-                if (length == ios->length)
-                        return ret;
-                else
-                        ios->length -= length;
-        }
-        return 0;
 }
 int ore_create(struct ore_io_state *ios)
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d222c77cfa1b..5f376d14fdcc 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
 {
        unsigned data_devs = sp2d->data_devs;
        unsigned group_width = data_devs + sp2d->parity;
-        unsigned p;
+        int p, c;
        if (!sp2d->needed)
                return;
-        for (p = 0; p < sp2d->pages_in_unit; p++) {
+        for (c = data_devs - 1; c >= 0; --c)
-                struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
+                for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
+                        struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
-                if (_1ps->write_count < group_width) {
-                        unsigned c;
-                        for (c = 0; c < data_devs; c++)
+                        if (_1ps->page_is_read[c]) {
-                                if (_1ps->page_is_read[c]) {
+                                struct page *page = _1ps->pages[c];
-                                        struct page *page = _1ps->pages[c];
-                                        r4w->put_page(priv, page);
+                                r4w->put_page(priv, page);
-                                        _1ps->page_is_read[c] = false;
+                                _1ps->page_is_read[c] = false;
-                                }
+                        }
                }
+        for (p = 0; p < sp2d->pages_in_unit; p++) {
+                struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
                memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
                _1ps->write_count = 0;
                _1ps->tx = NULL;
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
 * ios->sp2d[p][*], xor is calculated the same way. These pages are
 * allocated/freed and don't go through cache
 */
-static int _read_4_write(struct ore_io_state *ios)
+static int _read_4_write_first_stripe(struct ore_io_state *ios)
 {
-        struct ore_io_state *ios_read;
        struct ore_striping_info read_si;
        struct __stripe_pages_2d *sp2d = ios->sp2d;
        u64 offset = ios->si.first_stripe_start;
-        u64 last_stripe_end;
+        unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
-        unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
-        unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
-        int ret;
        if (offset == ios->offset) /* Go to start collect $200 */
                goto read_last_stripe;
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
        min_p = _sp2d_min_pg(sp2d);
        max_p = _sp2d_max_pg(sp2d);
+        ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
+                   offset, ios->offset, min_p, max_p);
        for (c = 0; ; c++) {
                ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
                read_si.obj_offset += min_p * PAGE_SIZE;
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
        }
 read_last_stripe:
+        return 0;
+}
+static int _read_4_write_last_stripe(struct ore_io_state *ios)
+{
+        struct ore_striping_info read_si;
+        struct __stripe_pages_2d *sp2d = ios->sp2d;
+        u64 offset;
+        u64 last_stripe_end;
+        unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
+        unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
        offset = ios->offset + ios->length;
        if (offset % PAGE_SIZE)
                _add_to_r4w_last_page(ios, &offset);
@@ -527,15 +538,15 @@ read_last_stripe:
        c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
                       ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
-        BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
-        /* unaligned IO must be within a single stripe */
        if (min_p == sp2d->pages_in_unit) {
                /* Didn't do it yet */
                min_p = _sp2d_min_pg(sp2d);
                max_p = _sp2d_max_pg(sp2d);
        }
+        ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
+                   offset, last_stripe_end, min_p, max_p);
        while (offset < last_stripe_end) {
                struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
@@ -568,6 +579,15 @@ read_last_stripe:
        }
 read_it:
+        return 0;
+}
+static int _read_4_write_execute(struct ore_io_state *ios)
+{
+        struct ore_io_state *ios_read;
+        unsigned i;
+        int ret;
        ios_read = ios->ios_read_4_write;
        if (!ios_read)
                return 0;
@@ -591,6 +611,8 @@ read_it:
        }
        _mark_read4write_pages_uptodate(ios_read, ret);
+        ore_put_io_state(ios_read);
+        ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
        return 0;
 }
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
                        /* If first stripe, Read in all read4write pages
                         * (if needed) before we calculate the first parity.
                         */
-                        _read_4_write(ios);
+                        _read_4_write_first_stripe(ios);
                }
+                if (!cur_len) /* If last stripe r4w pages of last stripe */
+                        _read_4_write_last_stripe(ios);
+                _read_4_write_execute(ios);
                for (i = 0; i < num_pages; i++) {
                        pages[i] = _raid_page_alloc();
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
 int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
 {
-        struct ore_layout *layout = ios->layout;
        if (ios->parity_pages) {
+                struct ore_layout *layout = ios->layout;
                unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
-                unsigned stripe_size = ios->si.bytes_in_stripe;
-                u64 last_stripe, first_stripe;
                if (_sp2d_alloc(pages_in_unit, layout->group_width,
                                layout->parity, &ios->sp2d)) {
                        return -ENOMEM;
                }
-                /* Round io down to last full strip */
-                first_stripe = div_u64(ios->offset, stripe_size);
-                last_stripe = div_u64(ios->offset + ios->length, stripe_size);
-                /* If an IO spans more then a single stripe it must end at
-                 * a stripe boundary. The reminder at the end is pushed into the
-                 * next IO.
-                 */
-                if (last_stripe != first_stripe) {
-                        ios->length = last_stripe * stripe_size - ios->offset;
-                        BUG_ON(!ios->length);
-                        ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
-                                        PAGE_SIZE;
-                        ios->si.length = ios->length; /*make it consistent */
-                }
        }
        return 0;
 }
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index e32bc919e4e3..5a7b691e748b 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -109,7 +109,7 @@ static struct kobj_type odev_ktype = {
 static struct kobj_type uuid_ktype = {
 };
-void exofs_sysfs_dbg_print()
+void exofs_sysfs_dbg_print(void)
 {
 #ifdef CONFIG_EXOFS_DEBUG
        struct kobject *k_name, *k_tmp;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e34deac3f366..6ec6f9ee2fec 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -268,7 +268,6 @@ group_extend_out:
                err = ext4_move_extents(filp, donor_filp, me.orig_start,
                                        me.donor_start, me.len, &me.moved_len);
                mnt_drop_write_file(filp);
-                mnt_drop_write(filp->f_path.mnt);
                if (copy_to_user((struct move_extent __user *)arg,
                                 &me, sizeof(me)))
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a3d81ebf6d86..0038b32cb362 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -738,22 +738,21 @@ static int
 fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent)
 {
        int len = *lenp;
-        u32 ipos_h, ipos_m, ipos_l;
+        struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+        loff_t i_pos;
        if (len < 5) {
                *lenp = 5;
                return 255; /* no room */
        }
-        ipos_h = MSDOS_I(inode)->i_pos >> 8;
+        i_pos = fat_i_pos_read(sbi, inode);
-        ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
-        ipos_l = (MSDOS_I(inode)->i_pos & 0x0f) << 28;
        *lenp = 5;
        fh[0] = inode->i_ino;
        fh[1] = inode->i_generation;
-        fh[2] = ipos_h;
+        fh[2] = i_pos >> 8;
-        fh[3] = ipos_m | MSDOS_I(inode)->i_logstart;
+        fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart;
-        fh[4] = ipos_l;
+        fh[4] = (i_pos & 0x0f) << 28;
        if (parent)
                fh[4] |= MSDOS_I(parent)->i_logstart;
        return 3;
diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d798e7..cf6f4345ceb0 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
 #include <linux/sched.h>
 #include <linux/pipe_fs_i.h>
-static void wait_for_partner(struct inode* inode, unsigned int *cnt)
+static int wait_for_partner(struct inode* inode, unsigned int *cnt)
 {
        int cur = *cnt; 
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
                if (signal_pending(current))
                        break;
        }
+        return cur == *cnt ? -ERESTARTSYS : 0;
 }
 static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
                                 * seen a writer */
                                filp->f_version = pipe->w_counter;
                        } else {
-                                wait_for_partner(inode, &pipe->w_counter);
+                                if (wait_for_partner(inode, &pipe->w_counter))
-                                if(signal_pending(current))
                                        goto err_rd;
                        }
                }
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
                        wake_up_partner(inode);
                if (!pipe->readers) {
-                        wait_for_partner(inode, &pipe->r_counter);
+                        if (wait_for_partner(inode, &pipe->r_counter))
-                        if (signal_pending(current))
                                goto err_wr;
                }
                break;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8d2fb8c88cf3..41a3ccff18d8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -664,6 +664,7 @@ static long writeback_sb_inodes(struct super_block *sb,
                        /* Wait for I_SYNC. This function drops i_lock... */
                        inode_sleep_on_writeback(inode);
                        /* Inode may be gone, start again */
+                        spin_lock(&wb->list_lock);
                        continue;
                }
                inode->i_state |= I_SYNC;
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index c640ba57074b..09addc8615fa 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -31,6 +31,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
        struct hfsplus_vh *vh = sbi->s_vhdr;
        struct hfsplus_vh *bvh = sbi->s_backup_vhdr;
+        u32 cnid = (unsigned long)dentry->d_fsdata;
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -41,8 +42,12 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
        vh->finder_info[0] = bvh->finder_info[0] =
                cpu_to_be32(parent_ino(dentry));
-        /* Bootloader */
+        /*
-        vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(inode->i_ino);
+         * Bootloader. Just using the inode here breaks in the case of
+         * hard links - the firmware wants the ID of the hard link file,
+         * but the inode points at the indirect inode
+         */
+        vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(cnid);
        /* Per spec, the OS X system folder - same as finder_info[0] here */
        vh->finder_info[5] = bvh->finder_info[5] =
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 7daf4b852d1c..90effcccca9a 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -56,7 +56,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
        DECLARE_COMPLETION_ONSTACK(wait);
        struct bio *bio;
        int ret = 0;
-        unsigned int io_size;
+        u64 io_size;
        loff_t start;
        int offset;
diff --git a/fs/locks.c b/fs/locks.c
index 814c51d0de47..fce6238d52c1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
        case F_WRLCK:
                return generic_add_lease(filp, arg, flp);
        default:
-                BUG();
+                return -EINVAL;
        }
 }
 EXPORT_SYMBOL(generic_setlease);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 970659daa323..23ff18fe080a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,7 +17,6 @@
 #include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/bc_xprt.h>
-#include <linux/nsproxy.h>
 #include <net/inet_sock.h>
@@ -107,7 +106,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
 {
        int ret;
-        ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET,
+        ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
                                nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
        if (ret <= 0)
                goto out_err;
@@ -115,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
        dprintk("NFS: Callback listener port = %u (af %u)\n",
                        nfs_callback_tcpport, PF_INET);
-        ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6,
+        ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
                                nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
        if (ret > 0) {
                nfs_callback_tcpport6 = ret;
@@ -184,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
         * fore channel connection.
         * Returns the input port (0) and sets the svc_serv bc_xprt on success
         */
-        ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0,
+        ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
                              SVC_SOCK_ANONYMOUS);
        if (ret < 0) {
                rqstp = ERR_PTR(ret);
@@ -254,7 +253,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
        char svc_name[12];
        int ret = 0;
        int minorversion_setup;
-        struct net *net = current->nsproxy->net_ns;
+        struct net *net = &init_net;
        mutex_lock(&nfs_callback_mutex);
        if (cb_info->users++ || cb_info->task != NULL) {
@@ -330,7 +329,7 @@ void nfs_callback_down(int minorversion)
        cb_info->users--;
        if (cb_info->users == 0 && cb_info->task != NULL) {
                kthread_stop(cb_info->task);
-                svc_shutdown_net(cb_info->serv, current->nsproxy->net_ns);
+                svc_shutdown_net(cb_info->serv, &init_net);
                svc_exit_thread(cb_info->rqst);
                cb_info->serv = NULL;
                cb_info->rqst = NULL;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 95bfc243992c..e64b01d2a338 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -455,9 +455,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
        args->csa_nrclists = ntohl(*p++);
        args->csa_rclists = NULL;
        if (args->csa_nrclists) {
-                args->csa_rclists = kmalloc(args->csa_nrclists *
+                args->csa_rclists = kmalloc_array(args->csa_nrclists,
-                                            sizeof(*args->csa_rclists),
+                                                  sizeof(*args->csa_rclists),
-                                            GFP_KERNEL);
+                                                  GFP_KERNEL);
                if (unlikely(args->csa_rclists == NULL))
                        goto out;
@@ -696,7 +696,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
                                       const struct cb_sequenceres *res)
 {
        __be32 *p;
-        unsigned status = res->csr_status;
+        __be32 status = res->csr_status;
        if (unlikely(status != 0))
                goto out;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7d108753af81..f005b5bebdc7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -207,7 +207,6 @@ error_0:
 static void nfs4_shutdown_session(struct nfs_client *clp)
 {
        if (nfs4_has_session(clp)) {
-                nfs4_deviceid_purge_client(clp);
                nfs4_destroy_session(clp->cl_session);
                nfs4_destroy_clientid(clp);
        }
@@ -544,8 +543,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
        smp_rmb();
-        BUG_ON(clp->cl_cons_state != NFS_CS_READY);
        dprintk("<-- %s found nfs_client %p for %s\n",
                __func__, clp, cl_init->hostname ?: "");
        return clp;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index ad2775d3e219..48253372ab1d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -484,17 +484,22 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
        list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
                if (!nfs_pageio_add_request(&desc, req)) {
+                        nfs_list_remove_request(req);
                        nfs_list_add_request(req, &failed);
                        spin_lock(cinfo.lock);
                        dreq->flags = 0;
                        dreq->error = -EIO;
                        spin_unlock(cinfo.lock);
                }
+                nfs_release_request(req);
        }
        nfs_pageio_complete(&desc);
-        while (!list_empty(&failed))
+        while (!list_empty(&failed)) {
+                req = nfs_list_entry(failed.next);
+                nfs_list_remove_request(req);
                nfs_unlock_and_release_request(req);
+        }
        if (put_dreq(dreq))
                nfs_direct_write_complete(dreq, dreq->inode);
@@ -523,9 +528,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
                nfs_list_remove_request(req);
                if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
                        /* Note the rewrite will go through mds */
-                        kref_get(&req->wb_kref);
                        nfs_mark_request_commit(req, NULL, &cinfo);
-                }
+                } else
+                        nfs_release_request(req);
                nfs_unlock_and_release_request(req);
        }
@@ -716,12 +721,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
                        if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
                                bit = NFS_IOHDR_NEED_RESCHED;
                        else if (dreq->flags == 0) {
-                                memcpy(&dreq->verf, &req->wb_verf,
+                                memcpy(&dreq->verf, hdr->verf,
                                       sizeof(dreq->verf));
                                bit = NFS_IOHDR_NEED_COMMIT;
                                dreq->flags = NFS_ODIRECT_DO_COMMIT;
                        } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-                                if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
+                                if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
                                        dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
                                        bit = NFS_IOHDR_NEED_RESCHED;
                                } else
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b5b86a05059c..864c51e4b400 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,11 @@ unsigned int nfs_idmap_cache_timeout = 600;
 static const struct cred *id_resolver_cache;
 static struct key_type key_type_id_resolver_legacy;
+struct idmap {
+        struct rpc_pipe         *idmap_pipe;
+        struct key_construction *idmap_key_cons;
+        struct mutex            idmap_mutex;
+};
 /**
 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
@@ -310,9 +315,11 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
                                            name, namelen, type, data,
                                            data_size, NULL);
        if (ret < 0) {
+                mutex_lock(&idmap->idmap_mutex);
                ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
                                            name, namelen, type, data,
                                            data_size, idmap);
+                mutex_unlock(&idmap->idmap_mutex);
        }
        return ret;
 }
@@ -354,11 +361,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ
 /* idmap classic begins here */
 module_param(nfs_idmap_cache_timeout, int, 0644);
-struct idmap {
-        struct rpc_pipe         *idmap_pipe;
-        struct key_construction *idmap_key_cons;
-};
 enum {
        Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
 };
@@ -469,6 +471,7 @@ nfs_idmap_new(struct nfs_client *clp)
                return error;
        }
        idmap->idmap_pipe = pipe;
+        mutex_init(&idmap->idmap_mutex);
        clp->cl_idmap = idmap;
        return 0;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e605d695dbcb..f7296983eba6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1530,7 +1530,6 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
        nfsi->delegation_state = 0;
        init_rwsem(&nfsi->rwsem);
        nfsi->layout = NULL;
-        atomic_set(&nfsi->commit_info.rpcs_out, 0);
 #endif
 }
@@ -1545,6 +1544,7 @@ static void init_once(void *foo)
        INIT_LIST_HEAD(&nfsi->commit_info.list);
        nfsi->npages = 0;
        nfsi->commit_info.ncommit = 0;
+        atomic_set(&nfsi->commit_info.rpcs_out, 0);
        atomic_set(&nfsi->silly_count, 1);
        INIT_HLIST_HEAD(&nfsi->silly_list);
        init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c6827f93ab57..cc5900ac61b5 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -295,7 +295,7 @@ is_ds_client(struct nfs_client *clp)
 extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
-extern const u32 nfs4_fattr_bitmap[2];
+extern const u32 nfs4_fattr_bitmap[3];
 extern const u32 nfs4_statfs_bitmap[2];
 extern const u32 nfs4_pathconf_bitmap[2];
 extern const u32 nfs4_fsinfo_bitmap[3];
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d48dbefa0e71..15fc7e4664ed 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -105,6 +105,8 @@ static int nfs4_map_errors(int err)
                return -EINVAL;
        case -NFS4ERR_SHARE_DENIED:
                return -EACCES;
+        case -NFS4ERR_MINOR_VERS_MISMATCH:
+                return -EPROTONOSUPPORT;
        default:
                dprintk("%s could not handle NFSv4 error %d\n",
                                __func__, -err);
@@ -116,7 +118,7 @@ static int nfs4_map_errors(int err)
 /*
 * This is our standard bitmap for GETATTR requests.
 */
-const u32 nfs4_fattr_bitmap[2] = {
+const u32 nfs4_fattr_bitmap[3] = {
        FATTR4_WORD0_TYPE
        | FATTR4_WORD0_CHANGE
        | FATTR4_WORD0_SIZE
@@ -133,6 +135,24 @@ const u32 nfs4_fattr_bitmap[2] = {
        | FATTR4_WORD1_TIME_MODIFY
 };
+static const u32 nfs4_pnfs_open_bitmap[3] = {
+        FATTR4_WORD0_TYPE
+        | FATTR4_WORD0_CHANGE
+        | FATTR4_WORD0_SIZE
+        | FATTR4_WORD0_FSID
+        | FATTR4_WORD0_FILEID,
+        FATTR4_WORD1_MODE
+        | FATTR4_WORD1_NUMLINKS
+        | FATTR4_WORD1_OWNER
+        | FATTR4_WORD1_OWNER_GROUP
+        | FATTR4_WORD1_RAWDEV
+        | FATTR4_WORD1_SPACE_USED
+        | FATTR4_WORD1_TIME_ACCESS
+        | FATTR4_WORD1_TIME_METADATA
+        | FATTR4_WORD1_TIME_MODIFY,
+        FATTR4_WORD2_MDSTHRESHOLD
+};
 const u32 nfs4_statfs_bitmap[2] = {
        FATTR4_WORD0_FILES_AVAIL
        | FATTR4_WORD0_FILES_FREE
@@ -844,6 +864,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        p->o_arg.name = &dentry->d_name;
        p->o_arg.server = server;
        p->o_arg.bitmask = server->attr_bitmask;
+        p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
        p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
        if (attrs != NULL && attrs->ia_valid != 0) {
                __be32 verf[2];
@@ -1820,6 +1841,7 @@ static int _nfs4_do_open(struct inode *dir,
                opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
                if (!opendata->f_attr.mdsthreshold)
                        goto err_opendata_put;
+                opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
        }
        if (dentry->d_inode != NULL)
                opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
@@ -1880,6 +1902,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
        struct nfs4_state *res;
        int status;
+        fmode &= FMODE_READ|FMODE_WRITE;
        do {
                status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
                                       &res, ctx_th);
@@ -2526,6 +2549,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        nfs_fattr_init(fattr);
        
+        /* Deal with open(O_TRUNC) */
+        if (sattr->ia_valid & ATTR_OPEN)
+                sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
+        /* Optimization: if the end result is no change, don't RPC */
+        if ((sattr->ia_valid & ~(ATTR_FILE)) == 0)
+                return 0;
        /* Search for an existing open(O_WRITE) file */
        if (sattr->ia_valid & ATTR_FILE) {
                struct nfs_open_context *ctx;
@@ -2537,10 +2568,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                }
        }
-        /* Deal with open(O_TRUNC) */
-        if (sattr->ia_valid & ATTR_OPEN)
-                sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
        status = nfs4_do_setattr(inode, cred, fattr, sattr, state);
        if (status == 0)
                nfs_setattr_update_inode(inode, sattr);
@@ -5275,7 +5302,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
        status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        if (status)
-                pr_warn("NFS: Got error %d from the server %s on "
+                dprintk("NFS: Got error %d from the server %s on "
                        "DESTROY_CLIENTID.", status, clp->cl_hostname);
        return status;
 }
@@ -5746,8 +5773,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
        status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        if (status)
-                printk(KERN_WARNING
+                dprintk("NFS: Got error %d from the server on DESTROY_SESSION. "
-                        "NFS: Got error %d from the server on DESTROY_SESSION. "
                        "Session has been destroyed regardless...\n", status);
        dprintk("<-- nfs4_proc_destroy_session\n");
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c679b9ecef63..f38300e9f171 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -244,6 +244,16 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)
        return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);
 }
+static void nfs41_finish_session_reset(struct nfs_client *clp)
+{
+        clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+        clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+        /* create_session negotiated new slot table */
+        clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
+        clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+        nfs41_setup_state_renewal(clp);
+}
 int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
        int status;
@@ -259,8 +269,7 @@ do_confirm:
        status = nfs4_proc_create_session(clp, cred);
        if (status != 0)
                goto out;
-        clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+        nfs41_finish_session_reset(clp);
-        nfs41_setup_state_renewal(clp);
        nfs_mark_client_ready(clp, NFS_CS_READY);
 out:
        return status;
@@ -1772,16 +1781,9 @@ static int nfs4_reset_session(struct nfs_client *clp)
                status = nfs4_handle_reclaim_lease_error(clp, status);
                goto out;
        }
-        clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+        nfs41_finish_session_reset(clp);
-        /* create_session negotiated new slot table */
-        clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
-        clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
        dprintk("%s: session reset was successful for server %s!\n",
                        __func__, clp->cl_hostname);
-         /* Let the state manager reestablish state */
-        if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
-                nfs41_setup_state_renewal(clp);
 out:
        if (cred)
                put_rpccred(cred);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ee4a74db95d0..18fae29b0301 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1198,12 +1198,13 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
 }
 static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
+                                 const u32 *open_bitmap,
                                 struct compound_hdr *hdr)
 {
        encode_getattr_three(xdr,
-                             bitmask[0] & nfs4_fattr_bitmap[0],
+                             bitmask[0] & open_bitmap[0],
-                             bitmask[1] & nfs4_fattr_bitmap[1],
+                             bitmask[1] & open_bitmap[1],
-                             bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
+                             bitmask[2] & open_bitmap[2],
                             hdr);
 }
@@ -2221,7 +2222,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
        encode_putfh(xdr, args->fh, &hdr);
        encode_open(xdr, args, &hdr);
        encode_getfh(xdr, &hdr);
-        encode_getfattr_open(xdr, args->bitmask, &hdr);
+        encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
        encode_nops(&hdr);
 }
@@ -4359,7 +4360,10 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
        if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
                return -EIO;
-        if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
+        if (bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD) {
+                /* Did the server return an unrequested attribute? */
+                if (unlikely(res == NULL))
+                        return -EREMOTEIO;
                p = xdr_inline_decode(xdr, 4);
                if (unlikely(!p))
                        goto out_overflow;
@@ -4372,6 +4376,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
                                __func__);
                status = decode_first_threshold_item4(xdr, res);
+                bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD;
        }
        return status;
 out_overflow:
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index b47277baebab..f50d3e8d6f22 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
        objios->ios->done = _read_done;
        dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
                rdata->args.offset, rdata->args.count);
-        return ore_read(objios->ios);
+        ret = ore_read(objios->ios);
+        if (unlikely(ret))
+                objio_free_result(&objios->oir);
+        return ret;
 }
 /*
@@ -486,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
        struct nfs_write_data *wdata = objios->oir.rpcdata;
        struct address_space *mapping = wdata->header->inode->i_mapping;
        pgoff_t index = offset / PAGE_SIZE;
-        struct page *page = find_get_page(mapping, index);
+        struct page *page;
+        loff_t i_size = i_size_read(wdata->header->inode);
+        if (offset >= i_size) {
+                *uptodate = true;
+                dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
+                return ZERO_PAGE(0);
+        }
+        page = find_get_page(mapping, index);
        if (!page) {
                page = find_or_create_page(mapping, index, GFP_NOFS);
                if (unlikely(!page)) {
@@ -507,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 static void __r4w_put_page(void *priv, struct page *page)
 {
-        dprintk("%s: index=0x%lx\n", __func__, page->index);
+        dprintk("%s: index=0x%lx\n", __func__,
-        page_cache_release(page);
+                (page == ZERO_PAGE(0)) ? -1UL : page->index);
+        if (ZERO_PAGE(0) != page)
+                page_cache_release(page);
        return;
 }
@@ -539,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
        dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
                wdata->args.offset, wdata->args.count);
        ret = ore_write(objios->ios);
-        if (unlikely(ret))
+        if (unlikely(ret)) {
+                objio_free_result(&objios->oir);
                return ret;
+        }
        if (objios->sync)
                _write_done(objios->ios, objios);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b8323aa7b543..bbc49caa7a82 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -70,6 +70,10 @@ find_pnfs_driver(u32 id)
        spin_lock(&pnfs_spinlock);
        local = find_pnfs_driver_locked(id);
+        if (local != NULL && !try_module_get(local->owner)) {
+                dprintk("%s: Could not grab reference on module\n", __func__);
+                local = NULL;
+        }
        spin_unlock(&pnfs_spinlock);
        return local;
 }
@@ -80,6 +84,9 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
        if (nfss->pnfs_curr_ld) {
                if (nfss->pnfs_curr_ld->clear_layoutdriver)
                        nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
+                /* Decrement the MDS count. Purge the deviceid cache if zero */
+                if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
+                        nfs4_deviceid_purge_client(nfss->nfs_client);
                module_put(nfss->pnfs_curr_ld->owner);
        }
        nfss->pnfs_curr_ld = NULL;
@@ -115,10 +122,6 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
                        goto out_no_driver;
                }
        }
-        if (!try_module_get(ld_type->owner)) {
-                dprintk("%s: Could not grab reference on module\n", __func__);
-                goto out_no_driver;
-        }
        server->pnfs_curr_ld = ld_type;
        if (ld_type->set_layoutdriver
            && ld_type->set_layoutdriver(server, mntfh)) {
@@ -127,6 +130,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
                module_put(ld_type->owner);
                goto out_no_driver;
        }
+        /* Bump the MDS count */
+        atomic_inc(&server->nfs_client->cl_mds_count);
        dprintk("%s: pNFS module for %u set\n", __func__, id);
        return;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 29fd23c0efdc..64f90d845f6a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -365,7 +365,7 @@ static inline bool
 pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,
                   struct nfs_server *nfss)
 {
-        return (dst && src && src->bm != 0 &&
+        return (dst && src && src->bm != 0 && nfss->pnfs_curr_ld &&
                                        nfss->pnfs_curr_ld->id == src->l_type);
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index a706b6bcc286..617c7419a08e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -651,7 +651,7 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
                /* Emulate the eof flag, which isn't normally needed in NFSv2
                 * as it is guaranteed to always return the file attributes
                 */
-                if (data->args.offset + data->args.count >= data->res.fattr->size)
+                if (data->args.offset + data->res.count >= data->res.fattr->size)
                        data->res.eof = 1;
        }
        return 0;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ff656c022684..06228192f64e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1867,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options,
        if (data == NULL)
                goto out_no_data;
+        args->version = NFS_DEFAULT_VERSION;
        switch (data->version) {
        case 1:
                data->namlen = 0;
@@ -2637,6 +2638,8 @@ static int nfs4_validate_mount_data(void *options,
        if (data == NULL)
                goto out_no_data;
+        args->version = 4;
        switch (data->version) {
        case 1:
                if (data->host_addrlen > sizeof(args->nfs_server.address))
@@ -2857,6 +2860,8 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
        dfprintk(MOUNT, "--> nfs4_try_mount()\n");
+        mount_info->fill_super = nfs4_fill_super;
        export_path = data->nfs_server.export_path;
        data->nfs_server.export_path = "/";
        root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e6fe3d69d14c..4d6861c0dc14 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -80,6 +80,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void)
                INIT_LIST_HEAD(&hdr->rpc_list);
                spin_lock_init(&hdr->lock);
                atomic_set(&hdr->refcnt, 0);
+                hdr->verf = &p->verf;
        }
        return p;
 }
@@ -619,6 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
                        goto next;
                }
                if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+                        memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf));
                        nfs_mark_request_commit(req, hdr->lseg, &cinfo);
                        goto next;
                }
@@ -1255,15 +1257,14 @@ static void nfs_writeback_release_common(void *calldata)
        struct nfs_write_data   *data = calldata;
        struct nfs_pgio_header *hdr = data->header;
        int status = data->task.tk_status;
-        struct nfs_page *req = hdr->req;
        if ((status >= 0) && nfs_write_need_commit(data)) {
                spin_lock(&hdr->lock);
                if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
                        ; /* Do nothing */
                else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-                        memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+                        memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
-                else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
+                else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
                        set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
                spin_unlock(&hdr->lock);
        }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8fdc9ec5c5d3..94effd5bc4a1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -900,7 +900,7 @@ static void free_session(struct kref *kref)
        struct nfsd4_session *ses;
        int mem;
-        BUG_ON(!spin_is_locked(&client_lock));
+        lockdep_assert_held(&client_lock);
        ses = container_of(kref, struct nfsd4_session, se_ref);
        nfsd4_del_conns(ses);
        spin_lock(&nfsd_drc_lock);
@@ -1080,7 +1080,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 static inline void
 free_client(struct nfs4_client *clp)
 {
-        BUG_ON(!spin_is_locked(&client_lock));
+        lockdep_assert_held(&client_lock);
        while (!list_empty(&clp->cl_sessions)) {
                struct nfsd4_session *ses;
                ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 08a07a218d26..57ceaf33d177 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -191,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
        while (!list_empty(head)) {
                ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
                list_del_init(&ii->i_dirty);
+                truncate_inode_pages(&ii->vfs_inode.i_data, 0);
+                nilfs_btnode_cache_clear(&ii->i_btnode_cache);
                iput(&ii->vfs_inode);
        }
 }
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 0e72ad6f22aa..88e11fb346b6 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2309,6 +2309,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
                if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
                        continue;
                list_del_init(&ii->i_dirty);
+                truncate_inode_pages(&ii->vfs_inode.i_data, 0);
+                nilfs_btnode_cache_clear(&ii->i_btnode_cache);
                iput(&ii->vfs_inode);
        }
 }
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 81a4cd22f80b..4f7795fb5fc0 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -456,7 +456,7 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
        stats->ls_gets++;
        stats->ls_total += ktime_to_ns(kt);
        /* overflow */
-        if (unlikely(stats->ls_gets) == 0) {
+        if (unlikely(stats->ls_gets == 0)) {
                stats->ls_gets++;
                stats->ls_total = ktime_to_ns(kt);
        }
@@ -3932,6 +3932,8 @@ unqueue:
 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
                                        struct ocfs2_lock_res *lockres)
 {
+        unsigned long flags;
        assert_spin_locked(&lockres->l_lock);
        if (lockres->l_flags & OCFS2_LOCK_FREEING) {
@@ -3945,21 +3947,22 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
        lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
-        spin_lock(&osb->dc_task_lock);
+        spin_lock_irqsave(&osb->dc_task_lock, flags);
        if (list_empty(&lockres->l_blocked_list)) {
                list_add_tail(&lockres->l_blocked_list,
                              &osb->blocked_lock_list);
                osb->blocked_lock_count++;
        }
-        spin_unlock(&osb->dc_task_lock);
+        spin_unlock_irqrestore(&osb->dc_task_lock, flags);
 }
 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
 {
        unsigned long processed;
+        unsigned long flags;
        struct ocfs2_lock_res *lockres;
-        spin_lock(&osb->dc_task_lock);
+        spin_lock_irqsave(&osb->dc_task_lock, flags);
        /* grab this early so we know to try again if a state change and
         * wake happens part-way through our work  */
        osb->dc_work_sequence = osb->dc_wake_sequence;
@@ -3972,38 +3975,40 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
                                     struct ocfs2_lock_res, l_blocked_list);
                list_del_init(&lockres->l_blocked_list);
                osb->blocked_lock_count--;
-                spin_unlock(&osb->dc_task_lock);
+                spin_unlock_irqrestore(&osb->dc_task_lock, flags);
                BUG_ON(!processed);
                processed--;
                ocfs2_process_blocked_lock(osb, lockres);
-                spin_lock(&osb->dc_task_lock);
+                spin_lock_irqsave(&osb->dc_task_lock, flags);
        }
-        spin_unlock(&osb->dc_task_lock);
+        spin_unlock_irqrestore(&osb->dc_task_lock, flags);
 }
 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
 {
        int empty = 0;
+        unsigned long flags;
-        spin_lock(&osb->dc_task_lock);
+        spin_lock_irqsave(&osb->dc_task_lock, flags);
        if (list_empty(&osb->blocked_lock_list))
                empty = 1;
-        spin_unlock(&osb->dc_task_lock);
+        spin_unlock_irqrestore(&osb->dc_task_lock, flags);
        return empty;
 }
 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
 {
        int should_wake = 0;
+        unsigned long flags;
-        spin_lock(&osb->dc_task_lock);
+        spin_lock_irqsave(&osb->dc_task_lock, flags);
        if (osb->dc_work_sequence != osb->dc_wake_sequence)
                should_wake = 1;
-        spin_unlock(&osb->dc_task_lock);
+        spin_unlock_irqrestore(&osb->dc_task_lock, flags);
        return should_wake;
 }
@@ -4033,10 +4038,12 @@ static int ocfs2_downconvert_thread(void *arg)
 void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
 {
-        spin_lock(&osb->dc_task_lock);
+        unsigned long flags;
+        spin_lock_irqsave(&osb->dc_task_lock, flags);
        /* make sure the voting thread gets a swipe at whatever changes
         * the caller may have made to the voting state */
        osb->dc_wake_sequence++;
-        spin_unlock(&osb->dc_task_lock);
+        spin_unlock_irqrestore(&osb->dc_task_lock, flags);
        wake_up(&osb->dc_event);
 }
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2f5b92ef0e53..70b5863a2d64 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -923,8 +923,6 @@ out_unlock:
        ocfs2_inode_unlock(inode, 0);
 out:
-        if (ret && ret != -ENXIO)
-                ret = -ENXIO;
        return ret;
 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 061591a3ab08..7602783d7f41 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1950,7 +1950,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
        if (ret < 0)
                mlog_errno(ret);
-        if (file->f_flags & O_SYNC)
+        if (file && (file->f_flags & O_SYNC))
                handle->h_sync = 1;
        ocfs2_commit_trans(osb, handle);
@@ -2422,8 +2422,10 @@ out_dio:
                unaligned_dio = 0;
        }
-        if (unaligned_dio)
+        if (unaligned_dio) {
+                ocfs2_iocb_clear_unaligned_aio(iocb);
                atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
+        }
 out:
        if (rw_level != -1)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 92fcd575775a..0a86e302655f 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -399,8 +399,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
                              msecs_to_jiffies(oinfo->dqi_syncms));
 out_err:
-        if (status)
-                mlog_errno(status);
        return status;
 out_unlock:
        ocfs2_unlock_global_qf(oinfo, 0);
diff --git a/fs/open.c b/fs/open.c
index d6c79a0dffc7..1540632d8387 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -397,10 +397,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 {
        struct file *file;
        struct inode *inode;
-        int error;
+        int error, fput_needed;
        error = -EBADF;
-        file = fget(fd);
+        file = fget_raw_light(fd, &fput_needed);
        if (!file)
                goto out;
@@ -414,7 +414,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
        if (!error)
                set_fs_pwd(current->fs, &file->f_path);
 out_putf:
-        fput(file);
+        fput_light(file, fput_needed);
 out:
        return error;
 }
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index aeb19e68e086..11a2aa2a56c4 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -258,7 +258,7 @@ fail:
        return rc;
 }
-int pstore_fill_super(struct super_block *sb, void *data, int silent)
+static int pstore_fill_super(struct super_block *sb, void *data, int silent)
 {
        struct inode *inode;
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 82c585f715e3..03ce7a9b81cc 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -94,20 +94,15 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
 * as we can from the end of the buffer.
 */
 static void pstore_dump(struct kmsg_dumper *dumper,
-            enum kmsg_dump_reason reason,
+                        enum kmsg_dump_reason reason)
-            const char *s1, unsigned long l1,
-            const char *s2, unsigned long l2)
 {
-        unsigned long   s1_start, s2_start;
+        unsigned long   total = 0;
-        unsigned long   l1_cpy, l2_cpy;
-        unsigned long   size, total = 0;
-        char            *dst;
        const char      *why;
        u64             id;
-        int             hsize, ret;
        unsigned int    part = 1;
        unsigned long   flags = 0;
        int             is_locked = 0;
+        int             ret;
        why = get_reason_str(reason);
@@ -119,30 +114,25 @@ static void pstore_dump(struct kmsg_dumper *dumper,
                spin_lock_irqsave(&psinfo->buf_lock, flags);
        oopscount++;
        while (total < kmsg_bytes) {
+                char *dst;
+                unsigned long size;
+                int hsize;
+                size_t len;
                dst = psinfo->buf;
                hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part);
                size = psinfo->bufsize - hsize;
                dst += hsize;
-                l2_cpy = min(l2, size);
+                if (!kmsg_dump_get_buffer(dumper, true, dst, size, &len))
-                l1_cpy = min(l1, size - l2_cpy);
-                if (l1_cpy + l2_cpy == 0)
                        break;
-                s2_start = l2 - l2_cpy;
-                s1_start = l1 - l1_cpy;
-                memcpy(dst, s1 + s1_start, l1_cpy);
-                memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
                ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
-                                   hsize + l1_cpy + l2_cpy, psinfo);
+                                    hsize + len, psinfo);
                if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
                        pstore_new_entry = 1;
-                l1 -= l1_cpy;
-                l2 -= l2_cpy;
+                total += hsize + len;
-                total += l1_cpy + l2_cpy;
                part++;
        }
        if (in_nmi()) {
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 9123cce28c1e..453030f9c5bc 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -106,6 +106,8 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
        time->tv_sec = 0;
        time->tv_nsec = 0;
+        /* Update old/shadowed buffer. */
+        persistent_ram_save_old(prz);
        size = persistent_ram_old_size(prz);
        *buf = kmalloc(size, GFP_KERNEL);
        if (*buf == NULL)
@@ -184,6 +186,7 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id,
                return -EINVAL;
        persistent_ram_free_old(cxt->przs[id]);
+        persistent_ram_zap(cxt->przs[id]);
        return 0;
 }
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 31f8d184f3a0..c5fbdbbf81ac 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -250,23 +250,24 @@ static void notrace persistent_ram_update(struct persistent_ram_zone *prz,
        persistent_ram_update_ecc(prz, start, count);
 }
-static void __init
+void persistent_ram_save_old(struct persistent_ram_zone *prz)
-persistent_ram_save_old(struct persistent_ram_zone *prz)
 {
        struct persistent_ram_buffer *buffer = prz->buffer;
        size_t size = buffer_size(prz);
        size_t start = buffer_start(prz);
-        char *dest;
-        persistent_ram_ecc_old(prz);
+        if (!size)
+                return;
-        dest = kmalloc(size, GFP_KERNEL);
+        if (!prz->old_log) {
-        if (dest == NULL) {
+                persistent_ram_ecc_old(prz);
+                prz->old_log = kmalloc(size, GFP_KERNEL);
+        }
+        if (!prz->old_log) {
                pr_err("persistent_ram: failed to allocate buffer\n");
                return;
        }
-        prz->old_log = dest;
        prz->old_log_size = size;
        memcpy(prz->old_log, &buffer->data[start], size - start);
        memcpy(prz->old_log + size - start, &buffer->data[0], start);
@@ -319,6 +320,13 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz)
        prz->old_log_size = 0;
 }
+void persistent_ram_zap(struct persistent_ram_zone *prz)
+{
+        atomic_set(&prz->buffer->start, 0);
+        atomic_set(&prz->buffer->size, 0);
+        persistent_ram_update_header_ecc(prz);
+}
 static void *persistent_ram_vmap(phys_addr_t start, size_t size)
 {
        struct page **pages;
@@ -405,6 +413,7 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool
                                " size %zu, start %zu\n",
                               buffer_size(prz), buffer_start(prz));
                        persistent_ram_save_old(prz);
+                        return 0;
                }
        } else {
                pr_info("persistent_ram: no valid data in buffer"
@@ -412,8 +421,7 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool
        }
        prz->buffer->sig = PERSISTENT_RAM_SIG;
-        atomic_set(&prz->buffer->start, 0);
+        persistent_ram_zap(prz);
-        atomic_set(&prz->buffer->size, 0);
        return 0;
 }
@@ -448,7 +456,6 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start,
                goto err;
        persistent_ram_post_init(prz, ecc);
-        persistent_ram_update_header_ecc(prz);
        return prz;
 err:
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index fbb0b478a346..d5378d028589 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
                /* prevent the page from being discarded on memory pressure */
                SetPageDirty(page);
+                SetPageUptodate(page);
                unlock_page(page);
                put_page(page);
diff --git a/fs/splice.c b/fs/splice.c
index c9f1318a3b82..7bf08fa22ec9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -273,13 +273,16 @@ void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
 * Check if we need to grow the arrays holding pages and partial page
 * descriptions.
 */
-int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
 {
-        if (pipe->buffers <= PIPE_DEF_BUFFERS)
+        unsigned int buffers = ACCESS_ONCE(pipe->buffers);
+        spd->nr_pages_max = buffers;
+        if (buffers <= PIPE_DEF_BUFFERS)
                return 0;
-        spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
+        spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL);
-        spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
+        spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL);
        if (spd->pages && spd->partial)
                return 0;
@@ -289,10 +292,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
        return -ENOMEM;
 }
-void splice_shrink_spd(struct pipe_inode_info *pipe,
+void splice_shrink_spd(struct splice_pipe_desc *spd)
-                       struct splice_pipe_desc *spd)
 {
-        if (pipe->buffers <= PIPE_DEF_BUFFERS)
+        if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
                return;
        kfree(spd->pages);
@@ -315,6 +317,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
        struct splice_pipe_desc spd = {
                .pages = pages,
                .partial = partial,
+                .nr_pages_max = PIPE_DEF_BUFFERS,
                .flags = flags,
                .ops = &page_cache_pipe_buf_ops,
                .spd_release = spd_release_page,
@@ -326,7 +329,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
        index = *ppos >> PAGE_CACHE_SHIFT;
        loff = *ppos & ~PAGE_CACHE_MASK;
        req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-        nr_pages = min(req_pages, pipe->buffers);
+        nr_pages = min(req_pages, spd.nr_pages_max);
        /*
         * Lookup the (hopefully) full range of pages we need.
@@ -497,7 +500,7 @@ fill_it:
        if (spd.nr_pages)
                error = splice_to_pipe(pipe, &spd);
-        splice_shrink_spd(pipe, &spd);
+        splice_shrink_spd(&spd);
        return error;
 }
@@ -598,6 +601,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
        struct splice_pipe_desc spd = {
                .pages = pages,
                .partial = partial,
+                .nr_pages_max = PIPE_DEF_BUFFERS,
                .flags = flags,
                .ops = &default_pipe_buf_ops,
                .spd_release = spd_release_page,
@@ -608,8 +612,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
        res = -ENOMEM;
        vec = __vec;
-        if (pipe->buffers > PIPE_DEF_BUFFERS) {
+        if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
-                vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
+                vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
                if (!vec)
                        goto shrink_ret;
        }
@@ -617,7 +621,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
        offset = *ppos & ~PAGE_CACHE_MASK;
        nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-        for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
+        for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
                struct page *page;
                page = alloc_page(GFP_USER);
@@ -665,7 +669,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 shrink_ret:
        if (vec != __vec)
                kfree(vec);
-        splice_shrink_spd(pipe, &spd);
+        splice_shrink_spd(&spd);
        return res;
 err:
@@ -1614,6 +1618,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
        struct splice_pipe_desc spd = {
                .pages = pages,
                .partial = partial,
+                .nr_pages_max = PIPE_DEF_BUFFERS,
                .flags = flags,
                .ops = &user_page_pipe_buf_ops,
                .spd_release = spd_release_page,
@@ -1629,13 +1634,13 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
        spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
                                            spd.partial, false,
-                                            pipe->buffers);
+                                            spd.nr_pages_max);
        if (spd.nr_pages <= 0)
                ret = spd.nr_pages;
        else
                ret = splice_to_pipe(pipe, &spd);
-        splice_shrink_spd(pipe, &spd);
+        splice_shrink_spd(&spd);
        return ret;
 }
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 84a7e6f3c046..92df3b081539 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2918,7 +2918,7 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
        struct dentry *dent;
        struct ubifs_debug_info *d = c->dbg;
-        if (!IS_ENABLED(DEBUG_FS))
+        if (!IS_ENABLED(CONFIG_DEBUG_FS))
                return 0;
        n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
@@ -3013,7 +3013,7 @@ out:
 */
 void dbg_debugfs_exit_fs(struct ubifs_info *c)
 {
-        if (IS_ENABLED(DEBUG_FS))
+        if (IS_ENABLED(CONFIG_DEBUG_FS))
                debugfs_remove_recursive(c->dbg->dfs_dir);
 }
@@ -3099,7 +3099,7 @@ int dbg_debugfs_init(void)
        const char *fname;
        struct dentry *dent;
-        if (!IS_ENABLED(DEBUG_FS))
+        if (!IS_ENABLED(CONFIG_DEBUG_FS))
                return 0;
        fname = "ubifs";
@@ -3166,7 +3166,7 @@ out:
 */
 void dbg_debugfs_exit(void)
 {
-        if (IS_ENABLED(DEBUG_FS))
+        if (IS_ENABLED(CONFIG_DEBUG_FS))
                debugfs_remove_recursive(dfs_rootdir);
 }
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 2559d174e004..28ec13af28d9 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -939,8 +939,8 @@ static int find_dirtiest_idx_leb(struct ubifs_info *c)
        }
        dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
                 lp->free, lp->flags);
-        ubifs_assert(lp->flags | LPROPS_TAKEN);
+        ubifs_assert(lp->flags & LPROPS_TAKEN);
-        ubifs_assert(lp->flags | LPROPS_INDEX);
+        ubifs_assert(lp->flags & LPROPS_INDEX);
        return lnum;
 }
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index ef3d1ba6d992..15e2fc5aa60b 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c)
                lnum = ubifs_next_log_lnum(c, lnum);
        }
-        /* Fixup the current log head */
+        /*
-        err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
+         * Fixup the log head which contains the only a CS node at the
+         * beginning.
+         */
+        err = fixup_leb(c, c->lhead_lnum,
+                        ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
        if (err)
                goto out;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index ac8a348dcb69..8d86a8706c0e 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -56,6 +56,7 @@
 #include <linux/seq_file.h>
 #include <linux/bitmap.h>
 #include <linux/crc-itu-t.h>
+#include <linux/log2.h>
 #include <asm/byteorder.h>
 #include "udf_sb.h"
@@ -1215,16 +1216,65 @@ out_bh:
        return ret;
 }
+static int udf_load_sparable_map(struct super_block *sb,
+                                 struct udf_part_map *map,
+                                 struct sparablePartitionMap *spm)
+{
+        uint32_t loc;
+        uint16_t ident;
+        struct sparingTable *st;
+        struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing;
+        int i;
+        struct buffer_head *bh;
+        map->s_partition_type = UDF_SPARABLE_MAP15;
+        sdata->s_packet_len = le16_to_cpu(spm->packetLength);
+        if (!is_power_of_2(sdata->s_packet_len)) {
+                udf_err(sb, "error loading logical volume descriptor: "
+                        "Invalid packet length %u\n",
+                        (unsigned)sdata->s_packet_len);
+                return -EIO;
+        }
+        if (spm->numSparingTables > 4) {
+                udf_err(sb, "error loading logical volume descriptor: "
+                        "Too many sparing tables (%d)\n",
+                        (int)spm->numSparingTables);
+                return -EIO;
+        }
+        for (i = 0; i < spm->numSparingTables; i++) {
+                loc = le32_to_cpu(spm->locSparingTable[i]);
+                bh = udf_read_tagged(sb, loc, loc, &ident);
+                if (!bh)
+                        continue;
+                st = (struct sparingTable *)bh->b_data;
+                if (ident != 0 ||
+                    strncmp(st->sparingIdent.ident, UDF_ID_SPARING,
+                            strlen(UDF_ID_SPARING)) ||
+                    sizeof(*st) + le16_to_cpu(st->reallocationTableLen) >
+                                                        sb->s_blocksize) {
+                        brelse(bh);
+                        continue;
+                }
+                sdata->s_spar_map[i] = bh;
+        }
+        map->s_partition_func = udf_get_pblock_spar15;
+        return 0;
+}
 static int udf_load_logicalvol(struct super_block *sb, sector_t block,
                               struct kernel_lb_addr *fileset)
 {
        struct logicalVolDesc *lvd;
-        int i, j, offset;
+        int i, offset;
        uint8_t type;
        struct udf_sb_info *sbi = UDF_SB(sb);
        struct genericPartitionMap *gpm;
        uint16_t ident;
        struct buffer_head *bh;
+        unsigned int table_len;
        int ret = 0;
        bh = udf_read_tagged(sb, block, block, &ident);
@@ -1232,15 +1282,20 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
                return 1;
        BUG_ON(ident != TAG_IDENT_LVD);
        lvd = (struct logicalVolDesc *)bh->b_data;
+        table_len = le32_to_cpu(lvd->mapTableLength);
-        i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
+        if (sizeof(*lvd) + table_len > sb->s_blocksize) {
-        if (i != 0) {
+                udf_err(sb, "error loading logical volume descriptor: "
-                ret = i;
+                        "Partition table too long (%u > %lu)\n", table_len,
+                        sb->s_blocksize - sizeof(*lvd));
                goto out_bh;
        }
+        ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
+        if (ret)
+                goto out_bh;
        for (i = 0, offset = 0;
-             i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
+             i < sbi->s_partitions && offset < table_len;
             i++, offset += gpm->partitionMapLength) {
                struct udf_part_map *map = &sbi->s_partmaps[i];
                gpm = (struct genericPartitionMap *)
@@ -1275,38 +1330,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
                        } else if (!strncmp(upm2->partIdent.ident,
                                                UDF_ID_SPARABLE,
                                                strlen(UDF_ID_SPARABLE))) {
-                                uint32_t loc;
+                                if (udf_load_sparable_map(sb, map,
-                                struct sparingTable *st;
+                                    (struct sparablePartitionMap *)gpm) < 0)
-                                struct sparablePartitionMap *spm =
+                                        goto out_bh;
-                                        (struct sparablePartitionMap *)gpm;
-                                map->s_partition_type = UDF_SPARABLE_MAP15;
-                                map->s_type_specific.s_sparing.s_packet_len =
-                                                le16_to_cpu(spm->packetLength);
-                                for (j = 0; j < spm->numSparingTables; j++) {
-                                        struct buffer_head *bh2;
-                                        loc = le32_to_cpu(
-                                                spm->locSparingTable[j]);
-                                        bh2 = udf_read_tagged(sb, loc, loc,
-                                                             &ident);
-                                        map->s_type_specific.s_sparing.
-                                                        s_spar_map[j] = bh2;
-                                        if (bh2 == NULL)
-                                                continue;
-                                        st = (struct sparingTable *)bh2->b_data;
-                                        if (ident != 0 || strncmp(
-                                                st->sparingIdent.ident,
-                                                UDF_ID_SPARING,
-                                                strlen(UDF_ID_SPARING))) {
-                                                brelse(bh2);
-                                                map->s_type_specific.s_sparing.
-                                                        s_spar_map[j] = NULL;
-                                        }
-                                }
-                                map->s_partition_func = udf_get_pblock_spar15;
                        } else if (!strncmp(upm2->partIdent.ident,
                                                UDF_ID_METADATA,
                                                strlen(UDF_ID_METADATA))) {
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 229641fb8e67..4f33c32affe3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1074,12 +1074,13 @@ restart:
         * If we couldn't get anything, give up.
         */
        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
                if (!forced++) {
                        trace_xfs_alloc_near_busy(args);
                        xfs_log_force(args->mp, XFS_LOG_SYNC);
                        goto restart;
                }
                trace_xfs_alloc_size_neither(args);
                args->agbno = NULLAGBLOCK;
                return 0;
@@ -2433,15 +2434,24 @@ xfs_alloc_vextent_worker(
        current_restore_flags_nested(&pflags, PF_FSTRANS);
 }
+/*
-int                             /* error */
+ * Data allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Metadata
+ * requests, OTOH, are generally from low stack usage paths, so avoid the
+ * context switch overhead here.
+ */
+int
 xfs_alloc_vextent(
-        xfs_alloc_arg_t *args)  /* allocation argument structure */
+        struct xfs_alloc_arg    *args)
 {
        DECLARE_COMPLETION_ONSTACK(done);
+        if (!args->userdata)
+                return __xfs_alloc_vextent(args);
        args->done = &done;
-        INIT_WORK(&args->work, xfs_alloc_vextent_worker);
+        INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
        queue_work(xfs_alloc_wq, &args->work);
        wait_for_completion(&done);
        return args->result;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index ae31c313a79e..8dad722c0041 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -981,10 +981,15 @@ xfs_vm_writepage(
                                imap_valid = 0;
                        }
                } else {
-                        if (PageUptodate(page)) {
+                        if (PageUptodate(page))
                                ASSERT(buffer_mapped(bh));
-                                imap_valid = 0;
+                        /*
-                        }
+                         * This buffer is not uptodate and will not be
+                         * written to disk.  Ensure that we will put any
+                         * subsequent writeable buffers into a new
+                         * ioend.
+                         */
+                        imap_valid = 0;
                        continue;
                }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 172d3cc8f8cb..269b35c084da 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -201,14 +201,7 @@ xfs_buf_alloc(
        bp->b_length = numblks;
        bp->b_io_length = numblks;
        bp->b_flags = flags;
+        bp->b_bn = blkno;
-        /*
-         * We do not set the block number here in the buffer because we have not
-         * finished initialising the buffer. We insert the buffer into the cache
-         * in this state, so this ensures that we are unable to do IO on a
-         * buffer that hasn't been fully initialised.
-         */
-        bp->b_bn = XFS_BUF_DADDR_NULL;
        atomic_set(&bp->b_pin_count, 0);
        init_waitqueue_head(&bp->b_waiters);
@@ -567,11 +560,6 @@ xfs_buf_get(
        if (bp != new_bp)
                xfs_buf_free(new_bp);
-        /*
-         * Now we have a workable buffer, fill in the block number so
-         * that we can do IO on it.
-         */
-        bp->b_bn = blkno;
        bp->b_io_length = bp->b_length;
 found:
@@ -772,7 +760,7 @@ xfs_buf_get_uncached(
        int                     error, i;
        xfs_buf_t               *bp;
-        bp = xfs_buf_alloc(target, 0, numblks, 0);
+        bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);
        if (unlikely(bp == NULL))
                goto fail;
@@ -1001,27 +989,6 @@ xfs_buf_ioerror_alert(
                (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
 }
-int
-xfs_bwrite(
-        struct xfs_buf          *bp)
-{
-        int                     error;
-        ASSERT(xfs_buf_islocked(bp));
-        bp->b_flags |= XBF_WRITE;
-        bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
-        xfs_bdstrat_cb(bp);
-        error = xfs_buf_iowait(bp);
-        if (error) {
-                xfs_force_shutdown(bp->b_target->bt_mount,
-                                   SHUTDOWN_META_IO_ERROR);
-        }
-        return error;
-}
 /*
 * Called when we want to stop a buffer from getting written or read.
 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1091,14 +1058,7 @@ xfs_bioerror_relse(
        return EIO;
 }
+STATIC int
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
 xfs_bdstrat_cb(
        struct xfs_buf  *bp)
 {
@@ -1119,6 +1079,27 @@ xfs_bdstrat_cb(
        return 0;
 }
+int
+xfs_bwrite(
+        struct xfs_buf          *bp)
+{
+        int                     error;
+        ASSERT(xfs_buf_islocked(bp));
+        bp->b_flags |= XBF_WRITE;
+        bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+        xfs_bdstrat_cb(bp);
+        error = xfs_buf_iowait(bp);
+        if (error) {
+                xfs_force_shutdown(bp->b_target->bt_mount,
+                                   SHUTDOWN_META_IO_ERROR);
+        }
+        return error;
+}
 /*
 * Wrapper around bdstrat so that we can stop data from going to disk in case
 * we are shutting down the filesystem.  Typically user data goes thru this
@@ -1255,7 +1236,7 @@ xfs_buf_iorequest(
         */
        atomic_set(&bp->b_io_remaining, 1);
        _xfs_buf_ioapply(bp);
-        _xfs_buf_ioend(bp, 0);
+        _xfs_buf_ioend(bp, 1);
        xfs_buf_rele(bp);
 }
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7f1d1392ce37..79344c48008e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
 extern int xfs_bwrite(struct xfs_buf *bp);
 extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
 extern void xfs_buf_ioend(xfs_buf_t *,  int);
 extern void xfs_buf_ioerror(xfs_buf_t *, int);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 45df2b857d48..d9e451115f98 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks(
                if (!XFS_BUF_ISSTALE(bp)) {
                        bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
-                        xfs_bdstrat_cb(bp);
+                        xfs_buf_iorequest(bp);
                } else {
                        xfs_buf_relse(bp);
                }
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6cdbf90c6f7b..d041d47d9d86 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -505,6 +505,14 @@ xfs_inode_item_push(
        }
        /*
+         * Stale inode items should force out the iclog.
+         */
+        if (ip->i_flags & XFS_ISTALE) {
+                rval = XFS_ITEM_PINNED;
+                goto out_unlock;
+        }
+        /*
         * Someone else is already flushing the inode.  Nothing we can do
         * here but wait for the flush to finish and remove the item from
         * the AIL.
@@ -514,15 +522,6 @@ xfs_inode_item_push(
                goto out_unlock;
        }
-        /*
-         * Stale inode items should force out the iclog.
-         */
-        if (ip->i_flags & XFS_ISTALE) {
-                xfs_ifunlock(ip);
-                xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                return XFS_ITEM_PINNED;
-        }
        ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
        ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f30d9807dc48..d90d4a388609 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -38,13 +38,21 @@
 kmem_zone_t     *xfs_log_ticket_zone;
 /* Local miscellaneous function prototypes */
-STATIC int       xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
+STATIC int
-                                    xlog_in_core_t **, xfs_lsn_t *);
+xlog_commit_record(
+        struct xlog             *log,
+        struct xlog_ticket      *ticket,
+        struct xlog_in_core     **iclog,
+        xfs_lsn_t               *commitlsnp);
 STATIC xlog_t *  xlog_alloc_log(xfs_mount_t     *mp,
                                xfs_buftarg_t   *log_target,
                                xfs_daddr_t     blk_offset,
                                int             num_bblks);
-STATIC int       xlog_space_left(struct log *log, atomic64_t *head);
+STATIC int
+xlog_space_left(
+        struct xlog             *log,
+        atomic64_t              *head);
 STATIC int       xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
 STATIC void      xlog_dealloc_log(xlog_t *log);
@@ -64,8 +72,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t		*log,
                                     int                eventual_size);
 STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
-STATIC void xlog_grant_push_ail(struct log      *log,
+STATIC void
-                                int             need_bytes);
+xlog_grant_push_ail(
+        struct xlog     *log,
+        int             need_bytes);
 STATIC void xlog_regrant_reserve_log_space(xlog_t        *log,
                                           xlog_ticket_t *ticket);
 STATIC void xlog_ungrant_log_space(xlog_t        *log,
@@ -73,7 +83,9 @@ STATIC void xlog_ungrant_log_space(xlog_t	 *log,
 #if defined(DEBUG)
 STATIC void     xlog_verify_dest_ptr(xlog_t *log, char *ptr);
-STATIC void     xlog_verify_grant_tail(struct log *log);
+STATIC void
+xlog_verify_grant_tail(
+        struct xlog     *log);
 STATIC void     xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
                                  int count, boolean_t syncing);
 STATIC void     xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
@@ -89,9 +101,9 @@ STATIC int	xlog_iclogs_empty(xlog_t *log);
 static void
 xlog_grant_sub_space(
-        struct log      *log,
+        struct xlog             *log,
-        atomic64_t      *head,
+        atomic64_t              *head,
-        int             bytes)
+        int                     bytes)
 {
        int64_t head_val = atomic64_read(head);
        int64_t new, old;
@@ -115,9 +127,9 @@ xlog_grant_sub_space(
 static void
 xlog_grant_add_space(
-        struct log      *log,
+        struct xlog             *log,
-        atomic64_t      *head,
+        atomic64_t              *head,
-        int             bytes)
+        int                     bytes)
 {
        int64_t head_val = atomic64_read(head);
        int64_t new, old;
@@ -165,7 +177,7 @@ xlog_grant_head_wake_all(
 static inline int
 xlog_ticket_reservation(
-        struct log              *log,
+        struct xlog             *log,
        struct xlog_grant_head  *head,
        struct xlog_ticket      *tic)
 {
@@ -182,7 +194,7 @@ xlog_ticket_reservation(
 STATIC bool
 xlog_grant_head_wake(
-        struct log              *log,
+        struct xlog             *log,
        struct xlog_grant_head  *head,
        int                     *free_bytes)
 {
@@ -204,7 +216,7 @@ xlog_grant_head_wake(
 STATIC int
 xlog_grant_head_wait(
-        struct log              *log,
+        struct xlog             *log,
        struct xlog_grant_head  *head,
        struct xlog_ticket      *tic,
        int                     need_bytes)
@@ -256,7 +268,7 @@ shutdown:
 */
 STATIC int
 xlog_grant_head_check(
-        struct log              *log,
+        struct xlog             *log,
        struct xlog_grant_head  *head,
        struct xlog_ticket      *tic,
        int                     *need_bytes)
@@ -323,7 +335,7 @@ xfs_log_regrant(
        struct xfs_mount        *mp,
        struct xlog_ticket      *tic)
 {
-        struct log              *log = mp->m_log;
+        struct xlog             *log = mp->m_log;
        int                     need_bytes;
        int                     error = 0;
@@ -389,7 +401,7 @@ xfs_log_reserve(
        bool                    permanent,
        uint                    t_type)
 {
-        struct log              *log = mp->m_log;
+        struct xlog             *log = mp->m_log;
        struct xlog_ticket      *tic;
        int                     need_bytes;
        int                     error = 0;
@@ -465,7 +477,7 @@ xfs_log_done(
        struct xlog_in_core     **iclog,
        uint                    flags)
 {
-        struct log              *log = mp->m_log;
+        struct xlog             *log = mp->m_log;
        xfs_lsn_t               lsn = 0;
        if (XLOG_FORCED_SHUTDOWN(log) ||
@@ -810,6 +822,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 void
 xfs_log_unmount(xfs_mount_t *mp)
 {
+        cancel_delayed_work_sync(&mp->m_sync_work);
        xfs_trans_ail_destroy(mp);
        xlog_dealloc_log(mp->m_log);
 }
@@ -838,7 +851,7 @@ void
 xfs_log_space_wake(
        struct xfs_mount        *mp)
 {
-        struct log              *log = mp->m_log;
+        struct xlog             *log = mp->m_log;
        int                     free_bytes;
        if (XLOG_FORCED_SHUTDOWN(log))
@@ -916,7 +929,7 @@ xfs_lsn_t
 xlog_assign_tail_lsn_locked(
        struct xfs_mount        *mp)
 {
-        struct log              *log = mp->m_log;
+        struct xlog             *log = mp->m_log;
        struct xfs_log_item     *lip;
        xfs_lsn_t               tail_lsn;
@@ -965,7 +978,7 @@ xlog_assign_tail_lsn(
 */
 STATIC int
 xlog_space_left(
-        struct log      *log,
+        struct xlog     *log,
        atomic64_t      *head)
 {
        int             free_bytes;
@@ -1277,7 +1290,7 @@ out:
 */
 STATIC int
 xlog_commit_record(
-        struct log              *log,
+        struct xlog             *log,
        struct xlog_ticket      *ticket,
        struct xlog_in_core     **iclog,
        xfs_lsn_t               *commitlsnp)
@@ -1311,7 +1324,7 @@ xlog_commit_record(
 */
 STATIC void
 xlog_grant_push_ail(
-        struct log      *log,
+        struct xlog     *log,
        int             need_bytes)
 {
        xfs_lsn_t       threshold_lsn = 0;
@@ -1790,7 +1803,7 @@ xlog_write_start_rec(
 static xlog_op_header_t *
 xlog_write_setup_ophdr(
-        struct log              *log,
+        struct xlog             *log,
        struct xlog_op_header   *ophdr,
        struct xlog_ticket      *ticket,
        uint                    flags)
@@ -1873,7 +1886,7 @@ xlog_write_setup_copy(
 static int
 xlog_write_copy_finish(
-        struct log              *log,
+        struct xlog             *log,
        struct xlog_in_core     *iclog,
        uint                    flags,
        int                     *record_cnt,
@@ -1958,7 +1971,7 @@ xlog_write_copy_finish(
 */
 int
 xlog_write(
-        struct log              *log,
+        struct xlog             *log,
        struct xfs_log_vec      *log_vector,
        struct xlog_ticket      *ticket,
        xfs_lsn_t               *start_lsn,
@@ -2821,7 +2834,7 @@ _xfs_log_force(
        uint                    flags,
        int                     *log_flushed)
 {
-        struct log              *log = mp->m_log;
+        struct xlog             *log = mp->m_log;
        struct xlog_in_core     *iclog;
        xfs_lsn_t               lsn;
@@ -2969,7 +2982,7 @@ _xfs_log_force_lsn(
        uint                    flags,
        int                     *log_flushed)
 {
-        struct log              *log = mp->m_log;
+        struct xlog             *log = mp->m_log;
        struct xlog_in_core     *iclog;
        int                     already_slept = 0;
@@ -3147,7 +3160,7 @@ xfs_log_ticket_get(
 */
 xlog_ticket_t *
 xlog_ticket_alloc(
-        struct log      *log,
+        struct xlog     *log,
        int             unit_bytes,
        int             cnt,
        char            client,
@@ -3278,7 +3291,7 @@ xlog_ticket_alloc(
 */
 void
 xlog_verify_dest_ptr(
-        struct log      *log,
+        struct xlog     *log,
        char            *ptr)
 {
        int i;
@@ -3307,7 +3320,7 @@ xlog_verify_dest_ptr(
 */
 STATIC void
 xlog_verify_grant_tail(
-        struct log      *log)
+        struct xlog     *log)
 {
        int             tail_cycle, tail_blocks;
        int             cycle, space;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 7d6197c58493..ddc4529d07d3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -44,7 +44,7 @@
 */
 static struct xlog_ticket *
 xlog_cil_ticket_alloc(
-        struct log      *log)
+        struct xlog     *log)
 {
        struct xlog_ticket *tic;
@@ -72,7 +72,7 @@ xlog_cil_ticket_alloc(
 */
 void
 xlog_cil_init_post_recovery(
-        struct log      *log)
+        struct xlog     *log)
 {
        log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
        log->l_cilp->xc_ctx->sequence = 1;
@@ -182,7 +182,7 @@ xlog_cil_prepare_log_vecs(
 */
 STATIC void
 xfs_cil_prepare_item(
-        struct log              *log,
+        struct xlog             *log,
        struct xfs_log_vec      *lv,
        int                     *len,
        int                     *diff_iovecs)
@@ -231,7 +231,7 @@ xfs_cil_prepare_item(
 */
 static void
 xlog_cil_insert_items(
-        struct log              *log,
+        struct xlog             *log,
        struct xfs_log_vec      *log_vector,
        struct xlog_ticket      *ticket)
 {
@@ -373,7 +373,7 @@ xlog_cil_committed(
 */
 STATIC int
 xlog_cil_push(
-        struct log              *log)
+        struct xlog             *log)
 {
        struct xfs_cil          *cil = log->l_cilp;
        struct xfs_log_vec      *lv;
@@ -601,7 +601,7 @@ xlog_cil_push_work(
 */
 static void
 xlog_cil_push_background(
-        struct log      *log)
+        struct xlog     *log)
 {
        struct xfs_cil  *cil = log->l_cilp;
@@ -629,7 +629,7 @@ xlog_cil_push_background(
 static void
 xlog_cil_push_foreground(
-        struct log      *log,
+        struct xlog     *log,
        xfs_lsn_t       push_seq)
 {
        struct xfs_cil  *cil = log->l_cilp;
@@ -683,7 +683,7 @@ xfs_log_commit_cil(
        xfs_lsn_t               *commit_lsn,
        int                     flags)
 {
-        struct log              *log = mp->m_log;
+        struct xlog             *log = mp->m_log;
        int                     log_flags = 0;
        struct xfs_log_vec      *log_vector;
@@ -754,7 +754,7 @@ xfs_log_commit_cil(
 */
 xfs_lsn_t
 xlog_cil_force_lsn(
-        struct log      *log,
+        struct xlog     *log,
        xfs_lsn_t       sequence)
 {
        struct xfs_cil          *cil = log->l_cilp;
@@ -833,7 +833,7 @@ xfs_log_item_in_current_chkpt(
 */
 int
 xlog_cil_init(
-        struct log      *log)
+        struct xlog     *log)
 {
        struct xfs_cil  *cil;
        struct xfs_cil_ctx *ctx;
@@ -869,7 +869,7 @@ xlog_cil_init(
 void
 xlog_cil_destroy(
-        struct log      *log)
+        struct xlog     *log)
 {
        if (log->l_cilp->xc_ctx) {
                if (log->l_cilp->xc_ctx->ticket)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5bc33261f5be..72eba2201b14 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -19,7 +19,7 @@
 #define __XFS_LOG_PRIV_H__
 struct xfs_buf;
-struct log;
+struct xlog;
 struct xlog_ticket;
 struct xfs_mount;
@@ -352,7 +352,7 @@ typedef struct xlog_in_core {
        struct xlog_in_core     *ic_next;
        struct xlog_in_core     *ic_prev;
        struct xfs_buf          *ic_bp;
-        struct log              *ic_log;
+        struct xlog             *ic_log;
        int                     ic_size;
        int                     ic_offset;
        int                     ic_bwritecnt;
@@ -409,7 +409,7 @@ struct xfs_cil_ctx {
 * operations almost as efficient as the old logging methods.
 */
 struct xfs_cil {
-        struct log              *xc_log;
+        struct xlog             *xc_log;
        struct list_head        xc_cil;
        spinlock_t              xc_cil_lock;
        struct xfs_cil_ctx      *xc_ctx;
@@ -487,7 +487,7 @@ struct xlog_grant_head {
 * overflow 31 bits worth of byte offset, so using a byte number will mean
 * that round off problems won't occur when releasing partial reservations.
 */
-typedef struct log {
+typedef struct xlog {
        /* The following fields don't need locking */
        struct xfs_mount        *l_mp;          /* mount point */
        struct xfs_ail          *l_ailp;        /* AIL log is working with */
@@ -553,9 +553,14 @@ extern int	 xlog_recover_finish(xlog_t *log);
 extern void      xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
 extern kmem_zone_t *xfs_log_ticket_zone;
-struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
+struct xlog_ticket *
-                                int count, char client, bool permanent,
+xlog_ticket_alloc(
-                                xfs_km_flags_t alloc_flags);
+        struct xlog     *log,
+        int             unit_bytes,
+        int             count,
+        char            client,
+        bool            permanent,
+        xfs_km_flags_t  alloc_flags);
 static inline void
@@ -567,9 +572,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
 }
 void    xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
-int     xlog_write(struct log *log, struct xfs_log_vec *log_vector,
+int
-                                struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
+xlog_write(
-                                xlog_in_core_t **commit_iclog, uint flags);
+        struct xlog             *log,
+        struct xfs_log_vec      *log_vector,
+        struct xlog_ticket      *tic,
+        xfs_lsn_t               *start_lsn,
+        struct xlog_in_core     **commit_iclog,
+        uint                    flags);
 /*
 * When we crack an atomic LSN, we sample it first so that the value will not
@@ -629,17 +639,23 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
 /*
 * Committed Item List interfaces
 */
-int     xlog_cil_init(struct log *log);
+int
-void    xlog_cil_init_post_recovery(struct log *log);
+xlog_cil_init(struct xlog *log);
-void    xlog_cil_destroy(struct log *log);
+void
+xlog_cil_init_post_recovery(struct xlog *log);
+void
+xlog_cil_destroy(struct xlog *log);
 /*
 * CIL force routines
 */
-xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
+xfs_lsn_t
+xlog_cil_force_lsn(
+        struct xlog *log,
+        xfs_lsn_t sequence);
 static inline void
-xlog_cil_force(struct log *log)
+xlog_cil_force(struct xlog *log)
 {
        xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
 }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ca386909131a..a7be98abd6a9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1471,8 +1471,8 @@ xlog_recover_add_item(
 STATIC int
 xlog_recover_add_to_cont_trans(
-        struct log              *log,
+        struct xlog             *log,
-        xlog_recover_t          *trans,
+        struct xlog_recover     *trans,
        xfs_caddr_t             dp,
        int                     len)
 {
@@ -1517,8 +1517,8 @@ xlog_recover_add_to_cont_trans(
 */
 STATIC int
 xlog_recover_add_to_trans(
-        struct log              *log,
+        struct xlog             *log,
-        xlog_recover_t          *trans,
+        struct xlog_recover     *trans,
        xfs_caddr_t             dp,
        int                     len)
 {
@@ -1588,8 +1588,8 @@ xlog_recover_add_to_trans(
 */
 STATIC int
 xlog_recover_reorder_trans(
-        struct log              *log,
+        struct xlog             *log,
-        xlog_recover_t          *trans,
+        struct xlog_recover     *trans,
        int                     pass)
 {
        xlog_recover_item_t     *item, *n;
@@ -1642,8 +1642,8 @@ xlog_recover_reorder_trans(
 */
 STATIC int
 xlog_recover_buffer_pass1(
-        struct log              *log,
+        struct xlog                     *log,
-        xlog_recover_item_t     *item)
+        struct xlog_recover_item        *item)
 {
        xfs_buf_log_format_t    *buf_f = item->ri_buf[0].i_addr;
        struct list_head        *bucket;
@@ -1696,7 +1696,7 @@ xlog_recover_buffer_pass1(
 */
 STATIC int
 xlog_check_buffer_cancelled(
-        struct log              *log,
+        struct xlog             *log,
        xfs_daddr_t             blkno,
        uint                    len,
        ushort                  flags)
@@ -2689,9 +2689,9 @@ xlog_recover_free_trans(
 STATIC int
 xlog_recover_commit_pass1(
-        struct log              *log,
+        struct xlog                     *log,
-        struct xlog_recover     *trans,
+        struct xlog_recover             *trans,
-        xlog_recover_item_t     *item)
+        struct xlog_recover_item        *item)
 {
        trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1);
@@ -2716,10 +2716,10 @@ xlog_recover_commit_pass1(
 STATIC int
 xlog_recover_commit_pass2(
-        struct log              *log,
+        struct xlog                     *log,
-        struct xlog_recover     *trans,
+        struct xlog_recover             *trans,
-        struct list_head        *buffer_list,
+        struct list_head                *buffer_list,
-        xlog_recover_item_t     *item)
+        struct xlog_recover_item        *item)
 {
        trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2);
@@ -2753,7 +2753,7 @@ xlog_recover_commit_pass2(
 */
 STATIC int
 xlog_recover_commit_trans(
-        struct log              *log,
+        struct xlog             *log,
        struct xlog_recover     *trans,
        int                     pass)
 {
@@ -2793,8 +2793,8 @@ out:
 STATIC int
 xlog_recover_unmount_trans(
-        struct log              *log,
+        struct xlog             *log,
-        xlog_recover_t          *trans)
+        struct xlog_recover     *trans)
 {
        /* Do nothing now */
        xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8b89c5ac72d9..90c1fc9eaea4 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,7 +53,7 @@ typedef struct xfs_trans_reservations {
 #include "xfs_sync.h"
-struct log;
+struct xlog;
 struct xfs_mount_args;
 struct xfs_inode;
 struct xfs_bmbt_irec;
@@ -133,7 +133,7 @@ typedef struct xfs_mount {
        uint                    m_readio_blocks; /* min read size blocks */
        uint                    m_writeio_log;  /* min write size log bytes */
        uint                    m_writeio_blocks; /* min write size blocks */
-        struct log              *m_log;         /* log specific stuff */
+        struct xlog             *m_log;         /* log specific stuff */
        int                     m_logbufs;      /* number of log buffers */
        int                     m_logbsize;     /* size of each log buffer */
        uint                    m_rsumlevels;   /* rt summary levels */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index c9d3409c5ca3..1e9ee064dbb2 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -386,23 +386,23 @@ xfs_sync_worker(
         * We shouldn't write/force the log if we are in the mount/unmount
         * process or on a read only filesystem. The workqueue still needs to be
         * active in both cases, however, because it is used for inode reclaim
-         * during these times.  Use the s_umount semaphore to provide exclusion
+         * during these times.  Use the MS_ACTIVE flag to avoid doing anything
-         * with unmount.
+         * during mount.  Doing work during unmount is avoided by calling
+         * cancel_delayed_work_sync on this work queue before tearing down
+         * the ail and the log in xfs_log_unmount.
         */
-        if (down_read_trylock(&mp->m_super->s_umount)) {
+        if (!(mp->m_super->s_flags & MS_ACTIVE) &&
-                if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+            !(mp->m_flags & XFS_MOUNT_RDONLY)) {
-                        /* dgc: errors ignored here */
+                /* dgc: errors ignored here */
-                        if (mp->m_super->s_frozen == SB_UNFROZEN &&
+                if (mp->m_super->s_frozen == SB_UNFROZEN &&
-                            xfs_log_need_covered(mp))
+                    xfs_log_need_covered(mp))
-                                error = xfs_fs_log_dummy(mp);
+                        error = xfs_fs_log_dummy(mp);
-                        else
+                else
-                                xfs_log_force(mp, 0);
+                        xfs_log_force(mp, 0);
-                        /* start pushing all the metadata that is currently
+                /* start pushing all the metadata that is currently
-                         * dirty */
+                 * dirty */
-                        xfs_ail_push_all(mp->m_ail);
+                xfs_ail_push_all(mp->m_ail);
-                }
-                up_read(&mp->m_super->s_umount);
        }
        /* queue us up again */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7cf9d3529e51..caf5dabfd553 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -32,7 +32,7 @@ struct xfs_da_node_entry;
 struct xfs_dquot;
 struct xfs_log_item;
 struct xlog_ticket;
-struct log;
+struct xlog;
 struct xlog_recover;
 struct xlog_recover_item;
 struct xfs_buf_log_format;
@@ -762,7 +762,7 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_force);
 DEFINE_DQUOT_EVENT(xfs_dqflush_done);
 DECLARE_EVENT_CLASS(xfs_loggrant_class,
-        TP_PROTO(struct log *log, struct xlog_ticket *tic),
+        TP_PROTO(struct xlog *log, struct xlog_ticket *tic),
        TP_ARGS(log, tic),
        TP_STRUCT__entry(
                __field(dev_t, dev)
@@ -830,7 +830,7 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
 #define DEFINE_LOGGRANT_EVENT(name) \
 DEFINE_EVENT(xfs_loggrant_class, name, \
-        TP_PROTO(struct log *log, struct xlog_ticket *tic), \
+        TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \
        TP_ARGS(log, tic))
 DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
 DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
@@ -1664,7 +1664,7 @@ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
 DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
 DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
-        TP_PROTO(struct log *log, struct xlog_recover *trans,
+        TP_PROTO(struct xlog *log, struct xlog_recover *trans,
                struct xlog_recover_item *item, int pass),
        TP_ARGS(log, trans, item, pass),
        TP_STRUCT__entry(
@@ -1698,7 +1698,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
 #define DEFINE_LOG_RECOVER_ITEM(name) \
 DEFINE_EVENT(xfs_log_recover_item_class, name, \
-        TP_PROTO(struct log *log, struct xlog_recover *trans, \
+        TP_PROTO(struct xlog *log, struct xlog_recover *trans, \
                struct xlog_recover_item *item, int pass), \
        TP_ARGS(log, trans, item, pass))
@@ -1709,7 +1709,7 @@ DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
 DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
 DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
-        TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+        TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f),
        TP_ARGS(log, buf_f),
        TP_STRUCT__entry(
                __field(dev_t, dev)
@@ -1739,7 +1739,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
 #define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
 DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
-        TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+        TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f), \
        TP_ARGS(log, buf_f))
 DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
@@ -1752,7 +1752,7 @@ DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
 DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
 DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
-        TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+        TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f),
        TP_ARGS(log, in_f),
        TP_STRUCT__entry(
                __field(dev_t, dev)
@@ -1790,7 +1790,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
 )
 #define DEFINE_LOG_RECOVER_INO_ITEM(name) \
 DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
-        TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+        TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f), \
        TP_ARGS(log, in_f))
 DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);