47 files changed, 652 insertions, 206 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aea605c98ba6..aae187a7f94a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -551,6 +551,7 @@ struct block_device *bdgrab(struct block_device *bdev)
        ihold(bdev->bd_inode);
        return bdev;
 }
+EXPORT_SYMBOL(bdgrab);
 long nr_blockdev_pages(void)
 {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ecd25a1b4e51..ca9d8f1a3bb6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
        if (tree_mod_dont_log(fs_info, NULL))
                return 0;
+        __tree_mod_log_free_eb(fs_info, old_root);
        ret = tree_mod_alloc(fs_info, flags, &tm);
        if (ret < 0)
                goto out;
@@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
 static noinline void
 tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
                     struct extent_buffer *src, unsigned long dst_offset,
-                     unsigned long src_offset, int nr_items)
+                     unsigned long src_offset, int nr_items, int log_removal)
 {
        int ret;
        int i;
@@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
        }
        for (i = 0; i < nr_items; i++) {
-                ret = tree_mod_log_insert_key_locked(fs_info, src,
+                if (log_removal) {
-                                                     i + src_offset,
+                        ret = tree_mod_log_insert_key_locked(fs_info, src,
-                                                     MOD_LOG_KEY_REMOVE);
+                                                        i + src_offset,
-                BUG_ON(ret < 0);
+                                                        MOD_LOG_KEY_REMOVE);
+                        BUG_ON(ret < 0);
+                }
                ret = tree_mod_log_insert_key_locked(fs_info, dst,
                                                     i + dst_offset,
                                                     MOD_LOG_KEY_ADD);
@@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        ret = btrfs_dec_ref(trans, root, buf, 1, 1);
                        BUG_ON(ret); /* -ENOMEM */
                }
-                tree_mod_log_free_eb(root->fs_info, buf);
                clean_tree_block(trans, root, buf);
                *last_ref = 1;
        }
@@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                btrfs_set_node_ptr_generation(parent, parent_slot,
                                              trans->transid);
                btrfs_mark_buffer_dirty(parent);
+                tree_mod_log_free_eb(root->fs_info, buf);
                btrfs_free_tree_block(trans, root, buf, parent_start,
                                      last_ref);
        }
@@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        goto enospc;
                }
-                tree_mod_log_free_eb(root->fs_info, root->node);
                tree_mod_log_set_root_pointer(root, child);
                rcu_assign_pointer(root->node, child);
@@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
                push_items = min(src_nritems - 8, push_items);
        tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
-                             push_items);
+                             push_items, 1);
        copy_extent_buffer(dst, src,
                           btrfs_node_key_ptr_offset(dst_nritems),
                           btrfs_node_key_ptr_offset(0),
@@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
                                      sizeof(struct btrfs_key_ptr));
        tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
-                             src_nritems - push_items, push_items);
+                             src_nritems - push_items, push_items, 1);
        copy_extent_buffer(dst, src,
                           btrfs_node_key_ptr_offset(0),
                           btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        int mid;
        int ret;
        u32 c_nritems;
+        int tree_mod_log_removal = 1;
        c = path->nodes[level];
        WARN_ON(btrfs_header_generation(c) != trans->transid);
        if (c == root->node) {
                /* trying to split the root, lets make a new one */
                ret = insert_new_root(trans, root, path, level + 1);
+                /*
+                 * removal of root nodes has been logged by
+                 * tree_mod_log_set_root_pointer due to locking
+                 */
+                tree_mod_log_removal = 0;
                if (ret)
                        return ret;
        } else {
@@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
                            (unsigned long)btrfs_header_chunk_tree_uuid(split),
                            BTRFS_UUID_SIZE);
-        tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+        tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid,
+                             tree_mod_log_removal);
        copy_extent_buffer(split, c,
                           btrfs_node_key_ptr_offset(0),
                           btrfs_node_key_ptr_offset(mid),
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7d84651e850b..6d19a0a554aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
                                      0, objectid, NULL, 0, 0, 0);
        if (IS_ERR(leaf)) {
                ret = PTR_ERR(leaf);
+                leaf = NULL;
                goto fail;
        }
@@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        btrfs_tree_unlock(leaf);
+        return root;
 fail:
-        if (ret)
+        if (leaf) {
-                return ERR_PTR(ret);
+                btrfs_tree_unlock(leaf);
+                free_extent_buffer(leaf);
+        }
+        kfree(root);
-        return root;
+        return ERR_PTR(ret);
 }
 static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
@@ -3253,7 +3259,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
-        if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
                btrfs_free_log(NULL, root);
                btrfs_free_log_root_tree(NULL, fs_info);
        }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9ac2eca681eb..3d551231caba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root,
                cache->bytes_super += stripe_len;
                ret = add_excluded_extent(root, cache->key.objectid,
                                          stripe_len);
-                BUG_ON(ret); /* -ENOMEM */
+                if (ret)
+                        return ret;
        }
        for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root,
                ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
                                       cache->key.objectid, bytenr,
                                       0, &logical, &nr, &stripe_len);
-                BUG_ON(ret); /* -ENOMEM */
+                if (ret)
+                        return ret;
                while (nr--) {
                        cache->bytes_super += stripe_len;
                        ret = add_excluded_extent(root, logical[nr],
                                                  stripe_len);
-                        BUG_ON(ret); /* -ENOMEM */
+                        if (ret) {
+                                kfree(logical);
+                                return ret;
+                        }
                }
                kfree(logical);
@@ -4438,7 +4443,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
        spin_lock(&sinfo->lock);
        spin_lock(&block_rsv->lock);
-        block_rsv->size = num_bytes;
+        block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
        num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
                    sinfo->bytes_reserved + sinfo->bytes_readonly +
@@ -4793,14 +4798,49 @@ out_fail:
         * If the inodes csum_bytes is the same as the original
         * csum_bytes then we know we haven't raced with any free()ers
         * so we can just reduce our inodes csum bytes and carry on.
-         * Otherwise we have to do the normal free thing to account for
-         * the case that the free side didn't free up its reserve
-         * because of this outstanding reservation.
         */
-        if (BTRFS_I(inode)->csum_bytes == csum_bytes)
+        if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
                calc_csum_metadata_size(inode, num_bytes, 0);
-        else
+        } else {
-                to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+                u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
+                u64 bytes;
+                /*
+                 * This is tricky, but first we need to figure out how much we
+                 * free'd from any free-ers that occured during this
+                 * reservation, so we reset ->csum_bytes to the csum_bytes
+                 * before we dropped our lock, and then call the free for the
+                 * number of bytes that were freed while we were trying our
+                 * reservation.
+                 */
+                bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
+                BTRFS_I(inode)->csum_bytes = csum_bytes;
+                to_free = calc_csum_metadata_size(inode, bytes, 0);
+                /*
+                 * Now we need to see how much we would have freed had we not
+                 * been making this reservation and our ->csum_bytes were not
+                 * artificially inflated.
+                 */
+                BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
+                bytes = csum_bytes - orig_csum_bytes;
+                bytes = calc_csum_metadata_size(inode, bytes, 0);
+                /*
+                 * Now reset ->csum_bytes to what it should be.  If bytes is
+                 * more than to_free then we would have free'd more space had we
+                 * not had an artificially high ->csum_bytes, so we need to free
+                 * the remainder.  If bytes is the same or less then we don't
+                 * need to do anything, the other free-ers did the correct
+                 * thing.
+                 */
+                BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
+                if (bytes > to_free)
+                        to_free = bytes - to_free;
+                else
+                        to_free = 0;
+        }
        spin_unlock(&BTRFS_I(inode)->lock);
        if (dropped)
                to_free += btrfs_calc_trans_metadata_size(root, dropped);
@@ -7947,7 +7987,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * info has super bytes accounted for, otherwise we'll think
                 * we have more space than we actually do.
                 */
-                exclude_super_stripes(root, cache);
+                ret = exclude_super_stripes(root, cache);
+                if (ret) {
+                        /*
+                         * We may have excluded something, so call this just in
+                         * case.
+                         */
+                        free_excluded_extents(root, cache);
+                        kfree(cache->free_space_ctl);
+                        kfree(cache);
+                        goto error;
+                }
                /*
                 * check for two cases, either we are full, and therefore
@@ -8089,7 +8139,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        cache->last_byte_to_unpin = (u64)-1;
        cache->cached = BTRFS_CACHE_FINISHED;
-        exclude_super_stripes(root, cache);
+        ret = exclude_super_stripes(root, cache);
+        if (ret) {
+                /*
+                 * We may have excluded something, so call this just in
+                 * case.
+                 */
+                free_excluded_extents(root, cache);
+                kfree(cache->free_space_ctl);
+                kfree(cache);
+                return ret;
+        }
        add_new_free_space(cache, root->fs_info, chunk_offset,
                           chunk_offset + size);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..cdee391fc7bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
                                GFP_NOFS);
 }
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+        unsigned long index = start >> PAGE_CACHE_SHIFT;
+        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+        struct page *page;
+        while (index <= end_index) {
+                page = find_get_page(inode->i_mapping, index);
+                BUG_ON(!page); /* Pages should be in the extent_io_tree */
+                clear_page_dirty_for_io(page);
+                page_cache_release(page);
+                index++;
+        }
+        return 0;
+}
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+        unsigned long index = start >> PAGE_CACHE_SHIFT;
+        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+        struct page *page;
+        while (index <= end_index) {
+                page = find_get_page(inode->i_mapping, index);
+                BUG_ON(!page); /* Pages should be in the extent_io_tree */
+                account_page_redirty(page);
+                __set_page_dirty_nobuffers(page);
+                page_cache_release(page);
+                index++;
+        }
+        return 0;
+}
 /*
 * helper function to set both pages and extents in the tree writeback
 */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..258c92156857 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
                      unsigned long *map_len);
 int extent_range_uptodate(struct extent_io_tree *tree,
                          u64 start, u64 end);
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
 int extent_clear_unlock_delalloc(struct inode *inode,
                                struct extent_io_tree *tree,
                                u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ec160202be3e..c4628a201cb3 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
                csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
                csums_in_item /= csum_size;
-                if (csum_offset >= csums_in_item) {
+                if (csum_offset == csums_in_item) {
                        ret = -EFBIG;
                        goto fail;
+                } else if (csum_offset > csums_in_item) {
+                        goto fail;
                }
        }
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
@@ -728,7 +730,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        sector_sum = sums->sums;
-        trans->adding_csums = 1;
 again:
        next_offset = (u64)-1;
        found_next = 0;
@@ -899,7 +900,6 @@ next_sector:
                goto again;
        }
 out:
-        trans->adding_csums = 0;
        btrfs_free_path(path);
        return ret;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5b4ea5f55b8f..ade03e6f7bd2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2142,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode,
 {
        struct inode *inode = file_inode(file);
        struct extent_state *cached_state = NULL;
+        struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 cur_offset;
        u64 last_byte;
        u64 alloc_start;
@@ -2169,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode,
        ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
        if (ret)
                return ret;
+        if (root->fs_info->quota_enabled) {
+                ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
+                if (ret)
+                        goto out_reserve_fail;
+        }
        /*
         * wait for ordered IO before we have any locks.  We'll loop again
@@ -2272,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode,
                             &cached_state, GFP_NOFS);
 out:
        mutex_unlock(&inode->i_mutex);
+        if (root->fs_info->quota_enabled)
+                btrfs_qgroup_free(root, alloc_end - alloc_start);
+out_reserve_fail:
        /* Let go of our reservation. */
        btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
        return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ca1b767d51f7..09c58a35b429 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
        int i;
        int will_compress;
        int compress_type = root->fs_info->compress_type;
+        int redirty = 0;
        /* if this is a small write inside eof, kick off a defrag */
        if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +416,17 @@ again:
                if (BTRFS_I(inode)->force_compress)
                        compress_type = BTRFS_I(inode)->force_compress;
+                /*
+                 * we need to call clear_page_dirty_for_io on each
+                 * page in the range.  Otherwise applications with the file
+                 * mmap'd can wander in and change the page contents while
+                 * we are compressing them.
+                 *
+                 * If the compression fails for any reason, we set the pages
+                 * dirty again later on.
+                 */
+                extent_range_clear_dirty_for_io(inode, start, end);
+                redirty = 1;
                ret = btrfs_compress_pages(compress_type,
                                           inode->i_mapping, start,
                                           total_compressed, pages,
@@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:
                        __set_page_dirty_nobuffers(locked_page);
                        /* unlocked later on in the async handlers */
                }
+                if (redirty)
+                        extent_range_redirty_for_io(inode, start, end);
                add_async_extent(async_cow, start, end - start + 1,
                                 0, NULL, 0, BTRFS_COMPRESS_NONE);
                *num_added += 1;
@@ -1743,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
        struct btrfs_ordered_sum *sum;
        list_for_each_entry(sum, list, list) {
+                trans->adding_csums = 1;
                btrfs_csum_file_blocks(trans,
                       BTRFS_I(inode)->root->fs_info->csum_root, sum);
+                trans->adding_csums = 0;
        }
        return 0;
 }
@@ -3679,11 +3695,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
         * 1 for the dir item
         * 1 for the dir index
         * 1 for the inode ref
-         * 1 for the inode ref in the tree log
-         * 2 for the dir entries in the log
         * 1 for the inode
         */
-        trans = btrfs_start_transaction(root, 8);
+        trans = btrfs_start_transaction(root, 5);
        if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
                return trans;
@@ -8127,7 +8141,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         * inodes.  So 5 * 2 is 10, plus 1 for the new link, so 11 total items
         * should cover the worst case number of items we'll modify.
         */
-        trans = btrfs_start_transaction(root, 20);
+        trans = btrfs_start_transaction(root, 11);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                goto out_notrans;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index dc08d77b717e..005c45db699e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
        INIT_LIST_HEAD(&splice);
        INIT_LIST_HEAD(&works);
+        mutex_lock(&root->fs_info->ordered_operations_mutex);
        spin_lock(&root->fs_info->ordered_extent_lock);
        list_splice_init(&root->fs_info->ordered_extents, &splice);
        while (!list_empty(&splice)) {
@@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
                cond_resched();
        }
+        mutex_unlock(&root->fs_info->ordered_operations_mutex);
 }
 /*
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5471e47d6559..b44124dd2370 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
        ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
                                   sgn > 0 ? node->seq - 1 : node->seq, &roots);
        if (ret < 0)
-                goto out;
+                return ret;
        spin_lock(&fs_info->qgroup_lock);
        quota_root = fs_info->quota_root;
@@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
        ret = 0;
 unlock:
        spin_unlock(&fs_info->qgroup_lock);
-out:
        ulist_free(roots);
        ulist_free(tmp);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 53c3501fa4ca..85e072b956d5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
        eb = path->nodes[0];
        ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
        item_size = btrfs_item_size_nr(eb, path->slots[0]);
-        btrfs_release_path(path);
        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
                do {
@@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
                                ret < 0 ? -1 : ref_level,
                                ret < 0 ? -1 : ref_root);
                } while (ret != 1);
+                btrfs_release_path(path);
        } else {
+                btrfs_release_path(path);
                swarn.path = path;
                swarn.dev = dev;
                iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f7a8b861058b..c85e7c6b4598 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3945,12 +3945,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
                    found_key.type != key.type) {
                        key.offset += right_len;
                        break;
-                } else {
+                }
-                        if (found_key.offset != key.offset + right_len) {
+                if (found_key.offset != key.offset + right_len) {
-                                /* Should really not happen */
+                        ret = 0;
-                                ret = -EIO;
+                        goto out;
-                                goto out;
-                        }
                }
                key = found_key;
        }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 451fad96ecd1..ef96381569a4 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -317,6 +317,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
        unsigned long src_ptr;
        unsigned long dst_ptr;
        int overwrite_root = 0;
+        bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
        if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
                overwrite_root = 1;
@@ -326,6 +327,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
        /* look for the key in the destination tree */
        ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+        if (ret < 0)
+                return ret;
        if (ret == 0) {
                char *src_copy;
                char *dst_copy;
@@ -367,6 +371,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
                        return 0;
                }
+                /*
+                 * We need to load the old nbytes into the inode so when we
+                 * replay the extents we've logged we get the right nbytes.
+                 */
+                if (inode_item) {
+                        struct btrfs_inode_item *item;
+                        u64 nbytes;
+                        item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                              struct btrfs_inode_item);
+                        nbytes = btrfs_inode_nbytes(path->nodes[0], item);
+                        item = btrfs_item_ptr(eb, slot,
+                                              struct btrfs_inode_item);
+                        btrfs_set_inode_nbytes(eb, item, nbytes);
+                }
+        } else if (inode_item) {
+                struct btrfs_inode_item *item;
+                /*
+                 * New inode, set nbytes to 0 so that the nbytes comes out
+                 * properly when we replay the extents.
+                 */
+                item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
+                btrfs_set_inode_nbytes(eb, item, 0);
        }
 insert:
        btrfs_release_path(path);
@@ -486,7 +514,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        int found_type;
        u64 extent_end;
        u64 start = key->offset;
-        u64 saved_nbytes;
+        u64 nbytes = 0;
        struct btrfs_file_extent_item *item;
        struct inode *inode = NULL;
        unsigned long size;
@@ -496,10 +524,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        found_type = btrfs_file_extent_type(eb, item);
        if (found_type == BTRFS_FILE_EXTENT_REG ||
-            found_type == BTRFS_FILE_EXTENT_PREALLOC)
+            found_type == BTRFS_FILE_EXTENT_PREALLOC) {
-                extent_end = start + btrfs_file_extent_num_bytes(eb, item);
+                nbytes = btrfs_file_extent_num_bytes(eb, item);
-        else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+                extent_end = start + nbytes;
+                /*
+                 * We don't add to the inodes nbytes if we are prealloc or a
+                 * hole.
+                 */
+                if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
+                        nbytes = 0;
+        } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
                size = btrfs_file_extent_inline_len(eb, item);
+                nbytes = btrfs_file_extent_ram_bytes(eb, item);
                extent_end = ALIGN(start + size, root->sectorsize);
        } else {
                ret = 0;
@@ -548,7 +585,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(path);
-        saved_nbytes = inode_get_bytes(inode);
        /* drop any overlapping extents */
        ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
        BUG_ON(ret);
@@ -635,7 +671,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                BUG_ON(ret);
        }
-        inode_set_bytes(inode, saved_nbytes);
+        inode_add_bytes(inode, nbytes);
        ret = btrfs_update_inode(trans, root, inode);
 out:
        if (inode)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5989a92236f7..2854c824ab64 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4935,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        em = lookup_extent_mapping(em_tree, chunk_start, 1);
        read_unlock(&em_tree->lock);
-        BUG_ON(!em || em->start != chunk_start);
+        if (!em) {
+                printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
+                       chunk_start);
+                return -EIO;
+        }
+        if (em->start != chunk_start) {
+                printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
+                       em->start, chunk_start);
+                free_extent_map(em);
+                return -EIO;
+        }
        map = (struct map_lookup *)em->bdev;
        length = em->len;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 991c63c6bdd0..21b3a291c327 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1575,14 +1575,24 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                        }
                        break;
                case Opt_blank_pass:
-                        vol->password = NULL;
-                        break;
-                case Opt_pass:
                        /* passwords have to be handled differently
                         * to allow the character used for deliminator
                         * to be passed within them
                         */
+                        /*
+                         * Check if this is a case where the  password
+                         * starts with a delimiter
+                         */
+                        tmp_end = strchr(data, '=');
+                        tmp_end++;
+                        if (!(tmp_end < end && tmp_end[1] == delim)) {
+                                /* No it is not. Set the password to NULL */
+                                vol->password = NULL;
+                                break;
+                        }
+                        /* Yes it is. Drop down to Opt_pass below.*/
+                case Opt_pass:
                        /* Obtain the value string */
                        value = strchr(data, '=');
                        value++;
diff --git a/fs/dcache.c b/fs/dcache.c
index fbfae008ba44..e8bc3420d63e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2542,7 +2542,6 @@ static int prepend_path(const struct path *path,
        bool slash = false;
        int error = 0;
-        br_read_lock(&vfsmount_lock);
        while (dentry != root->dentry || vfsmnt != root->mnt) {
                struct dentry * parent;
@@ -2572,8 +2571,6 @@ static int prepend_path(const struct path *path,
        if (!error && !slash)
                error = prepend(buffer, buflen, "/", 1);
-out:
-        br_read_unlock(&vfsmount_lock);
        return error;
 global_root:
@@ -2590,7 +2587,7 @@ global_root:
                error = prepend(buffer, buflen, "/", 1);
        if (!error)
                error = is_mounted(vfsmnt) ? 1 : 2;
-        goto out;
+        return error;
 }
 /**
@@ -2617,9 +2614,11 @@ char *__d_path(const struct path *path,
        int error;
        prepend(&res, &buflen, "\0", 1);
+        br_read_lock(&vfsmount_lock);
        write_seqlock(&rename_lock);
        error = prepend_path(path, root, &res, &buflen);
        write_sequnlock(&rename_lock);
+        br_read_unlock(&vfsmount_lock);
        if (error < 0)
                return ERR_PTR(error);
@@ -2636,9 +2635,11 @@ char *d_absolute_path(const struct path *path,
        int error;
        prepend(&res, &buflen, "\0", 1);
+        br_read_lock(&vfsmount_lock);
        write_seqlock(&rename_lock);
        error = prepend_path(path, &root, &res, &buflen);
        write_sequnlock(&rename_lock);
+        br_read_unlock(&vfsmount_lock);
        if (error > 1)
                error = -EINVAL;
@@ -2702,11 +2703,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
        get_fs_root(current->fs, &root);
+        br_read_lock(&vfsmount_lock);
        write_seqlock(&rename_lock);
        error = path_with_deleted(path, &root, &res, &buflen);
+        write_sequnlock(&rename_lock);
+        br_read_unlock(&vfsmount_lock);
        if (error < 0)
                res = ERR_PTR(error);
-        write_sequnlock(&rename_lock);
        path_put(&root);
        return res;
 }
@@ -2830,6 +2833,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
        get_fs_root_and_pwd(current->fs, &root, &pwd);
        error = -ENOENT;
+        br_read_lock(&vfsmount_lock);
        write_seqlock(&rename_lock);
        if (!d_unlinked(pwd.dentry)) {
                unsigned long len;
@@ -2839,6 +2843,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                prepend(&cwd, &buflen, "\0", 1);
                error = prepend_path(&pwd, &root, &cwd, &buflen);
                write_sequnlock(&rename_lock);
+                br_read_unlock(&vfsmount_lock);
                if (error < 0)
                        goto out;
@@ -2859,6 +2864,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                }
        } else {
                write_sequnlock(&rename_lock);
+                br_read_unlock(&vfsmount_lock);
        }
 out:
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 412e6eda25f8..e4141f257495 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -80,13 +80,6 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
        int rc;
        mutex_lock(&ecryptfs_daemon_hash_mux);
-        rc = try_module_get(THIS_MODULE);
-        if (rc == 0) {
-                rc = -EIO;
-                printk(KERN_ERR "%s: Error attempting to increment module use "
-                       "count; rc = [%d]\n", __func__, rc);
-                goto out_unlock_daemon_list;
-        }
        rc = ecryptfs_find_daemon_by_euid(&daemon);
        if (!rc) {
                rc = -EINVAL;
@@ -96,7 +89,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
        if (rc) {
                printk(KERN_ERR "%s: Error attempting to spawn daemon; "
                       "rc = [%d]\n", __func__, rc);
-                goto out_module_put_unlock_daemon_list;
+                goto out_unlock_daemon_list;
        }
        mutex_lock(&daemon->mux);
        if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) {
@@ -108,9 +101,6 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
        atomic_inc(&ecryptfs_num_miscdev_opens);
 out_unlock_daemon:
        mutex_unlock(&daemon->mux);
-out_module_put_unlock_daemon_list:
-        if (rc)
-                module_put(THIS_MODULE);
 out_unlock_daemon_list:
        mutex_unlock(&ecryptfs_daemon_hash_mux);
        return rc;
@@ -147,7 +137,6 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
                       "bug.\n", __func__, rc);
                BUG();
        }
-        module_put(THIS_MODULE);
        return rc;
 }
@@ -471,6 +460,7 @@ out_free:
 static const struct file_operations ecryptfs_miscdev_fops = {
+        .owner   = THIS_MODULE,
        .open    = ecryptfs_miscdev_open,
        .poll    = ecryptfs_miscdev_poll,
        .read    = ecryptfs_miscdev_read,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 56efcaadf848..9c6d06dcef8b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2999,20 +2999,23 @@ static int ext4_split_extent_at(handle_t *handle,
                        if (split_flag & EXT4_EXT_DATA_VALID1) {
                                err = ext4_ext_zeroout(inode, ex2);
                                zero_ex.ee_block = ex2->ee_block;
-                                zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
+                                zero_ex.ee_len = cpu_to_le16(
+                                                ext4_ext_get_actual_len(ex2));
                                ext4_ext_store_pblock(&zero_ex,
                                                      ext4_ext_pblock(ex2));
                        } else {
                                err = ext4_ext_zeroout(inode, ex);
                                zero_ex.ee_block = ex->ee_block;
-                                zero_ex.ee_len = ext4_ext_get_actual_len(ex);
+                                zero_ex.ee_len = cpu_to_le16(
+                                                ext4_ext_get_actual_len(ex));
                                ext4_ext_store_pblock(&zero_ex,
                                                      ext4_ext_pblock(ex));
                        }
                } else {
                        err = ext4_ext_zeroout(inode, &orig_ex);
                        zero_ex.ee_block = orig_ex.ee_block;
-                        zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
+                        zero_ex.ee_len = cpu_to_le16(
+                                                ext4_ext_get_actual_len(&orig_ex));
                        ext4_ext_store_pblock(&zero_ex,
                                              ext4_ext_pblock(&orig_ex));
                }
@@ -3272,7 +3275,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                if (err)
                        goto out;
                zero_ex.ee_block = ex->ee_block;
-                zero_ex.ee_len = ext4_ext_get_actual_len(ex);
+                zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex));
                ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
                err = ext4_ext_get_access(handle, inode, path + depth);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index b505a145a593..a04183127ef0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1539,9 +1539,9 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
                blk = *i_data;
                if (level > 0) {
                        ext4_lblk_t first2;
-                        bh = sb_bread(inode->i_sb, blk);
+                        bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
                        if (!bh) {
-                                EXT4_ERROR_INODE_BLOCK(inode, blk,
+                                EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
                                                       "Read failure");
                                return -EIO;
                        }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 019f45e45097..d79c2dadc536 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -923,8 +923,11 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
                cmd = F_SETLK;
                fl->fl_type = F_UNLCK;
        }
-        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+                if (fl->fl_type == F_UNLCK)
+                        posix_lock_file_wait(file, fl);
                return -EIO;
+        }
        if (IS_GETLK(cmd))
                return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
        else if (fl->fl_type == F_UNLCK)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 156e42ec84ea..5c29216e9cc1 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -588,6 +588,7 @@ struct lm_lockstruct {
        struct dlm_lksb ls_control_lksb; /* control_lock */
        char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
        struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
+        char *ls_lvb_bits;
        spinlock_t ls_recover_spin; /* protects following fields */
        unsigned long ls_recover_flags; /* DFL_ */
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 9802de0f85e6..c8423d6de6c3 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -483,12 +483,8 @@ static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
 static int all_jid_bits_clear(char *lvb)
 {
-        int i;
+        return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0,
-        for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) {
+                        GDLM_LVB_SIZE - JID_BITMAP_OFFSET);
-                if (lvb[i])
-                        return 0;
-        }
-        return 1;
 }
 static void sync_wait_cb(void *arg)
@@ -580,7 +576,6 @@ static void gfs2_control_func(struct work_struct *work)
 {
        struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-        char lvb_bits[GDLM_LVB_SIZE];
        uint32_t block_gen, start_gen, lvb_gen, flags;
        int recover_set = 0;
        int write_lvb = 0;
@@ -634,7 +629,7 @@ static void gfs2_control_func(struct work_struct *work)
                return;
        }
-        control_lvb_read(ls, &lvb_gen, lvb_bits);
+        control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
        spin_lock(&ls->ls_recover_spin);
        if (block_gen != ls->ls_recover_block ||
@@ -664,10 +659,10 @@ static void gfs2_control_func(struct work_struct *work)
                        ls->ls_recover_result[i] = 0;
-                        if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET))
+                        if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET))
                                continue;
-                        __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+                        __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
                        write_lvb = 1;
                }
        }
@@ -691,7 +686,7 @@ static void gfs2_control_func(struct work_struct *work)
                                continue;
                        if (ls->ls_recover_submit[i] < start_gen) {
                                ls->ls_recover_submit[i] = 0;
-                                __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+                                __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
                        }
                }
                /* even if there are no bits to set, we need to write the
@@ -705,7 +700,7 @@ static void gfs2_control_func(struct work_struct *work)
        spin_unlock(&ls->ls_recover_spin);
        if (write_lvb) {
-                control_lvb_write(ls, start_gen, lvb_bits);
+                control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
                flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
        } else {
                flags = DLM_LKF_CONVERT;
@@ -725,7 +720,7 @@ static void gfs2_control_func(struct work_struct *work)
         */
        for (i = 0; i < recover_size; i++) {
-                if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) {
+                if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) {
                        fs_info(sdp, "recover generation %u jid %d\n",
                                start_gen, i);
                        gfs2_recover_set(sdp, i);
@@ -758,7 +753,6 @@ static void gfs2_control_func(struct work_struct *work)
 static int control_mount(struct gfs2_sbd *sdp)
 {
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-        char lvb_bits[GDLM_LVB_SIZE];
        uint32_t start_gen, block_gen, mount_gen, lvb_gen;
        int mounted_mode;
        int retries = 0;
@@ -857,7 +851,7 @@ locks_done:
         * lvb_gen will be non-zero.
         */
-        control_lvb_read(ls, &lvb_gen, lvb_bits);
+        control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
        if (lvb_gen == 0xFFFFFFFF) {
                /* special value to force mount attempts to fail */
@@ -887,7 +881,7 @@ locks_done:
         * and all lvb bits to be clear (no pending journal recoveries.)
         */
-        if (!all_jid_bits_clear(lvb_bits)) {
+        if (!all_jid_bits_clear(ls->ls_lvb_bits)) {
                /* journals need recovery, wait until all are clear */
                fs_info(sdp, "control_mount wait for journal recovery\n");
                goto restart;
@@ -949,7 +943,6 @@ static int dlm_recovery_wait(void *word)
 static int control_first_done(struct gfs2_sbd *sdp)
 {
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-        char lvb_bits[GDLM_LVB_SIZE];
        uint32_t start_gen, block_gen;
        int error;
@@ -991,8 +984,8 @@ restart:
        memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
        spin_unlock(&ls->ls_recover_spin);
-        memset(lvb_bits, 0, sizeof(lvb_bits));
+        memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE);
-        control_lvb_write(ls, start_gen, lvb_bits);
+        control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
        error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
        if (error)
@@ -1022,6 +1015,12 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
        uint32_t old_size, new_size;
        int i, max_jid;
+        if (!ls->ls_lvb_bits) {
+                ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
+                if (!ls->ls_lvb_bits)
+                        return -ENOMEM;
+        }
        max_jid = 0;
        for (i = 0; i < num_slots; i++) {
                if (max_jid < slots[i].slot - 1)
@@ -1057,6 +1056,7 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
 static void free_recover_size(struct lm_lockstruct *ls)
 {
+        kfree(ls->ls_lvb_bits);
        kfree(ls->ls_recover_submit);
        kfree(ls->ls_recover_result);
        ls->ls_recover_submit = NULL;
@@ -1205,6 +1205,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
        ls->ls_recover_size = 0;
        ls->ls_recover_submit = NULL;
        ls->ls_recover_result = NULL;
+        ls->ls_lvb_bits = NULL;
        error = set_recover_size(sdp, NULL, 0);
        if (error)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index d1f51fd73f86..5a51265a4341 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -576,7 +576,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
        RB_CLEAR_NODE(&ip->i_res->rs_node);
 out:
        up_write(&ip->i_rw_mutex);
-        return 0;
+        return error;
 }
 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -1181,12 +1181,9 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
                             const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
 {
        struct super_block *sb = sdp->sd_vfs;
-        struct block_device *bdev = sb->s_bdev;
-        const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
-                                           bdev_logical_block_size(sb->s_bdev);
        u64 blk;
        sector_t start = 0;
-        sector_t nr_sects = 0;
+        sector_t nr_blks = 0;
        int rv;
        unsigned int x;
        u32 trimmed = 0;
@@ -1206,35 +1203,34 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
                if (diff == 0)
                        continue;
                blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
-                blk *= sects_per_blk; /* convert to sectors */
                while(diff) {
                        if (diff & 1) {
-                                if (nr_sects == 0)
+                                if (nr_blks == 0)
                                        goto start_new_extent;
-                                if ((start + nr_sects) != blk) {
+                                if ((start + nr_blks) != blk) {
-                                        if (nr_sects >= minlen) {
+                                        if (nr_blks >= minlen) {
-                                                rv = blkdev_issue_discard(bdev,
+                                                rv = sb_issue_discard(sb,
-                                                        start, nr_sects,
+                                                        start, nr_blks,
                                                        GFP_NOFS, 0);
                                                if (rv)
                                                        goto fail;
-                                                trimmed += nr_sects;
+                                                trimmed += nr_blks;
                                        }
-                                        nr_sects = 0;
+                                        nr_blks = 0;
 start_new_extent:
                                        start = blk;
                                }
-                                nr_sects += sects_per_blk;
+                                nr_blks++;
                        }
                        diff >>= 2;
-                        blk += sects_per_blk;
+                        blk++;
                }
        }
-        if (nr_sects >= minlen) {
+        if (nr_blks >= minlen) {
-                rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
+                rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
                if (rv)
                        goto fail;
-                trimmed += nr_sects;
+                trimmed += nr_blks;
        }
        if (ptrimmed)
                *ptrimmed = trimmed;
diff --git a/fs/inode.c b/fs/inode.c
index f5f7c06c36fb..a898b3d43ccf 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -725,7 +725,7 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan)
                 * inode to the back of the list so we don't spin on it.
                 */
                if (!spin_trylock(&inode->i_lock)) {
-                        list_move_tail(&inode->i_lru, &sb->s_inode_lru);
+                        list_move(&inode->i_lru, &sb->s_inode_lru);
                        continue;
                }
diff --git a/fs/internal.h b/fs/internal.h
index 507141fceb99..4be78237d896 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,3 +125,8 @@ extern int invalidate_inodes(struct super_block *, bool);
 * dcache.c
 */
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
+/*
+ * read_write.c
+ */
+extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
diff --git a/fs/namespace.c b/fs/namespace.c
index 50ca17d3cb45..341d3f564082 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
        }
        mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+        /* Don't allow unprivileged users to change mount flags */
+        if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
+                mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
        atomic_inc(&sb->s_active);
        mnt->mnt.mnt_sb = sb;
        mnt->mnt.mnt_root = dget(root);
@@ -1686,7 +1690,7 @@ static int do_loopback(struct path *path, const char *old_name,
        if (IS_ERR(mnt)) {
                err = PTR_ERR(mnt);
-                goto out;
+                goto out2;
        }
        err = graft_tree(mnt, path);
@@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
        if (readonly_request == __mnt_is_readonly(mnt))
                return 0;
+        if (mnt->mnt_flags & MNT_LOCK_READONLY)
+                return -EPERM;
        if (readonly_request)
                error = mnt_make_readonly(real_mount(mnt));
        else
@@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        /* First pass: copy the tree topology */
        copy_flags = CL_COPY_ALL | CL_EXPIRE;
        if (user_ns != mnt_ns->user_ns)
-                copy_flags |= CL_SHARED_TO_SLAVE;
+                copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
        new = copy_tree(old, old->mnt.mnt_root, copy_flags);
        if (IS_ERR(new)) {
                up_write(&namespace_sem);
@@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt)
        return check_mnt(real_mount(mnt));
 }
+bool current_chrooted(void)
+{
+        /* Does the current process have a non-standard root */
+        struct path ns_root;
+        struct path fs_root;
+        bool chrooted;
+        /* Find the namespace root */
+        ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
+        ns_root.dentry = ns_root.mnt->mnt_root;
+        path_get(&ns_root);
+        while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
+                ;
+        get_fs_root(current->fs, &fs_root);
+        chrooted = !path_equal(&fs_root, &ns_root);
+        path_put(&fs_root);
+        path_put(&ns_root);
+        return chrooted;
+}
+void update_mnt_policy(struct user_namespace *userns)
+{
+        struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+        struct mount *mnt;
+        down_read(&namespace_sem);
+        list_for_each_entry(mnt, &ns->list, mnt_list) {
+                switch (mnt->mnt.mnt_sb->s_magic) {
+                case SYSFS_MAGIC:
+                        userns->may_mount_sysfs = true;
+                        break;
+                case PROC_SUPER_MAGIC:
+                        userns->may_mount_proc = true;
+                        break;
+                }
+                if (userns->may_mount_sysfs && userns->may_mount_proc)
+                        break;
+        }
+        up_read(&namespace_sem);
+}
 static void *mntns_get(struct task_struct *task)
 {
        struct mnt_namespace *ns = NULL;
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839bc17b..6fc7b5cae92b 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
        bl_pipe_msg.bl_wq = &nn->bl_wq;
        memset(msg, 0, sizeof(*msg));
-        msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS);
+        msg->len = sizeof(bl_msg) + bl_msg.totallen;
+        msg->data = kzalloc(msg->len, GFP_NOFS);
        if (!msg->data)
                goto out;
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
        memcpy(msg->data, &bl_msg, sizeof(bl_msg));
        dataptr = (uint8_t *) msg->data;
        memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
-        msg->len = sizeof(bl_msg) + bl_msg.totallen;
        add_wait_queue(&nn->bl_wq, &wq);
        if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dc0f98dfa717..c516da5873fd 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -726,9 +726,9 @@ out1:
        return ret;
 }
-static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data)
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
 {
-        return key_instantiate_and_link(key, data, strlen(data) + 1,
+        return key_instantiate_and_link(key, data, datalen,
                                        id_resolver_cache->thread_keyring,
                                        authkey);
 }
@@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
                struct key *key, struct key *authkey)
 {
        char id_str[NFS_UINT_MAXLEN];
+        size_t len;
        int ret = -ENOKEY;
        /* ret = -ENOKEY */
@@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
        case IDMAP_CONV_NAMETOID:
                if (strcmp(upcall->im_name, im->im_name) != 0)
                        break;
-                sprintf(id_str, "%d", im->im_id);
+                /* Note: here we store the NUL terminator too */
-                ret = nfs_idmap_instantiate(key, authkey, id_str);
+                len = sprintf(id_str, "%d", im->im_id) + 1;
+                ret = nfs_idmap_instantiate(key, authkey, id_str, len);
                break;
        case IDMAP_CONV_IDTONAME:
                if (upcall->im_id != im->im_id)
                        break;
-                ret = nfs_idmap_instantiate(key, authkey, im->im_name);
+                len = strlen(im->im_name);
+                ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
                break;
        default:
                ret = -EINVAL;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index ac4fc9a8fdbc..66b6664dcd4c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -300,7 +300,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
                           struct rpc_cred *cred)
 {
        struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
-        struct nfs_client *pos, *n, *prev = NULL;
+        struct nfs_client *pos, *prev = NULL;
        struct nfs4_setclientid_res clid = {
                .clientid       = new->cl_clientid,
                .confirm        = new->cl_confirm,
@@ -308,10 +308,23 @@ int nfs40_walk_client_list(struct nfs_client *new,
        int status = -NFS4ERR_STALE_CLIENTID;
        spin_lock(&nn->nfs_client_lock);
-        list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
+        list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
                /* If "pos" isn't marked ready, we can't trust the
                 * remaining fields in "pos" */
-                if (pos->cl_cons_state < NFS_CS_READY)
+                if (pos->cl_cons_state > NFS_CS_READY) {
+                        atomic_inc(&pos->cl_count);
+                        spin_unlock(&nn->nfs_client_lock);
+                        if (prev)
+                                nfs_put_client(prev);
+                        prev = pos;
+                        status = nfs_wait_client_init_complete(pos);
+                        spin_lock(&nn->nfs_client_lock);
+                        if (status < 0)
+                                continue;
+                }
+                if (pos->cl_cons_state != NFS_CS_READY)
                        continue;
                if (pos->rpc_ops != new->rpc_ops)
@@ -423,16 +436,16 @@ int nfs41_walk_client_list(struct nfs_client *new,
                           struct rpc_cred *cred)
 {
        struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
-        struct nfs_client *pos, *n, *prev = NULL;
+        struct nfs_client *pos, *prev = NULL;
        int status = -NFS4ERR_STALE_CLIENTID;
        spin_lock(&nn->nfs_client_lock);
-        list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
+        list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
                /* If "pos" isn't marked ready, we can't trust the
                 * remaining fields in "pos", especially the client
                 * ID and serverowner fields.  Wait for CREATE_SESSION
                 * to finish. */
-                if (pos->cl_cons_state < NFS_CS_READY) {
+                if (pos->cl_cons_state > NFS_CS_READY) {
                        atomic_inc(&pos->cl_count);
                        spin_unlock(&nn->nfs_client_lock);
@@ -440,18 +453,17 @@ int nfs41_walk_client_list(struct nfs_client *new,
                                nfs_put_client(prev);
                        prev = pos;
-                        nfs4_schedule_lease_recovery(pos);
                        status = nfs_wait_client_init_complete(pos);
-                        if (status < 0) {
+                        if (status == 0) {
-                                nfs_put_client(pos);
+                                nfs4_schedule_lease_recovery(pos);
-                                spin_lock(&nn->nfs_client_lock);
+                                status = nfs4_wait_clnt_recover(pos);
-                                continue;
                        }
-                        status = pos->cl_cons_state;
                        spin_lock(&nn->nfs_client_lock);
                        if (status < 0)
                                continue;
                }
+                if (pos->cl_cons_state != NFS_CS_READY)
+                        continue;
                if (pos->rpc_ops != new->rpc_ops)
                        continue;
@@ -469,17 +481,18 @@ int nfs41_walk_client_list(struct nfs_client *new,
                        continue;
                atomic_inc(&pos->cl_count);
-                spin_unlock(&nn->nfs_client_lock);
+                *result = pos;
+                status = 0;
                dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
                        __func__, pos, atomic_read(&pos->cl_count));
+                break;
-                *result = pos;
-                return 0;
        }
        /* No matching nfs_client found. */
        spin_unlock(&nn->nfs_client_lock);
        dprintk("NFS: <-- %s status = %d\n", __func__, status);
+        if (prev)
+                nfs_put_client(prev);
        return status;
 }
 #endif  /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb044c109..4fb234d3aefb 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
 {
        if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
                return;
-        clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
        pnfs_return_layout(inode);
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb0f901..0ad025eb523b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1046,6 +1046,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
                /* Save the delegation */
                nfs4_stateid_copy(&stateid, &delegation->stateid);
                rcu_read_unlock();
+                nfs_release_seqid(opendata->o_arg.seqid);
                ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
                if (ret != 0)
                        goto out;
@@ -2632,7 +2633,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        int status;
        if (pnfs_ld_layoutret_on_setattr(inode))
-                pnfs_return_layout(inode);
+                pnfs_commit_and_return_layout(inode);
        nfs_fattr_init(fattr);
        
@@ -6416,22 +6417,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 static void nfs4_layoutcommit_release(void *calldata)
 {
        struct nfs4_layoutcommit_data *data = calldata;
-        struct pnfs_layout_segment *lseg, *tmp;
-        unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
        pnfs_cleanup_layoutcommit(data);
-        /* Matched by references in pnfs_set_layoutcommit */
-        list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
-                list_del_init(&lseg->pls_lc_list);
-                if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
-                                       &lseg->pls_flags))
-                        pnfs_put_lseg(lseg);
-        }
-        clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
-        smp_mb__after_clear_bit();
-        wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
        put_rpccred(data->cred);
        kfree(data);
 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6ace365c6334..d41a3518509f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1886,7 +1886,13 @@ again:
                        status = PTR_ERR(clnt);
                        break;
                }
-                clp->cl_rpcclient = clnt;
+                /* Note: this is safe because we haven't yet marked the
+                 * client as ready, so we are the only user of
+                 * clp->cl_rpcclient
+                 */
+                clnt = xchg(&clp->cl_rpcclient, clnt);
+                rpc_shutdown_client(clnt);
+                clnt = clp->cl_rpcclient;
                goto again;
        case -NFS4ERR_MINOR_VERS_MISMATCH:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aad6258..4bdffe0ba025 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
               lo_seg_intersecting(lseg_range, recall_range);
 }
+static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
+                struct list_head *tmp_list)
+{
+        if (!atomic_dec_and_test(&lseg->pls_refcount))
+                return false;
+        pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
+        list_add(&lseg->pls_list, tmp_list);
+        return true;
+}
 /* Returns 1 if lseg is removed from list, 0 otherwise */
 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
                             struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
                 */
                dprintk("%s: lseg %p ref %d\n", __func__, lseg,
                        atomic_read(&lseg->pls_refcount));
-                if (atomic_dec_and_test(&lseg->pls_refcount)) {
+                if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
-                        pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
-                        list_add(&lseg->pls_list, tmp_list);
                        rv = 1;
-                }
        }
        return rv;
 }
@@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
        return lseg;
 }
+static void pnfs_clear_layoutcommit(struct inode *inode,
+                struct list_head *head)
+{
+        struct nfs_inode *nfsi = NFS_I(inode);
+        struct pnfs_layout_segment *lseg, *tmp;
+        if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+                return;
+        list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
+                if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+                        continue;
+                pnfs_lseg_dec_and_remove_zero(lseg, head);
+        }
+}
 /*
 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
 * when the layout segment list is empty.
@@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino)
        /* Reference matched in nfs4_layoutreturn_release */
        pnfs_get_layout_hdr(lo);
        empty = list_empty(&lo->plh_segs);
+        pnfs_clear_layoutcommit(ino, &tmp_list);
        pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
        /* Don't send a LAYOUTRETURN if list was initially empty */
        if (empty) {
@@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino)
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
-        WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
        lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
        if (unlikely(lrp == NULL)) {
                status = -ENOMEM;
@@ -845,6 +866,33 @@ out:
 }
 EXPORT_SYMBOL_GPL(_pnfs_return_layout);
+int
+pnfs_commit_and_return_layout(struct inode *inode)
+{
+        struct pnfs_layout_hdr *lo;
+        int ret;
+        spin_lock(&inode->i_lock);
+        lo = NFS_I(inode)->layout;
+        if (lo == NULL) {
+                spin_unlock(&inode->i_lock);
+                return 0;
+        }
+        pnfs_get_layout_hdr(lo);
+        /* Block new layoutgets and read/write to ds */
+        lo->plh_block_lgets++;
+        spin_unlock(&inode->i_lock);
+        filemap_fdatawait(inode->i_mapping);
+        ret = pnfs_layoutcommit_inode(inode, true);
+        if (ret == 0)
+                ret = _pnfs_return_layout(inode);
+        spin_lock(&inode->i_lock);
+        lo->plh_block_lgets--;
+        spin_unlock(&inode->i_lock);
+        pnfs_put_layout_hdr(lo);
+        return ret;
+}
 bool pnfs_roc(struct inode *ino)
 {
        struct pnfs_layout_hdr *lo;
@@ -1458,7 +1506,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
        dprintk("pnfs write error = %d\n", hdr->pnfs_error);
        if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
            PNFS_LAYOUTRET_ON_ERROR) {
-                clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
                pnfs_return_layout(hdr->inode);
        }
        if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1660,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
        dprintk("pnfs read error = %d\n", hdr->pnfs_error);
        if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
            PNFS_LAYOUTRET_ON_ERROR) {
-                clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
                pnfs_return_layout(hdr->inode);
        }
        if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1746,11 +1792,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
        list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
                if (lseg->pls_range.iomode == IOMODE_RW &&
-                    test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+                    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
                        list_add(&lseg->pls_lc_list, listp);
        }
 }
+static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
+{
+        struct pnfs_layout_segment *lseg, *tmp;
+        unsigned long *bitlock = &NFS_I(inode)->flags;
+        /* Matched by references in pnfs_set_layoutcommit */
+        list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
+                list_del_init(&lseg->pls_lc_list);
+                pnfs_put_lseg(lseg);
+        }
+        clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+        smp_mb__after_clear_bit();
+        wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 {
        pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
        if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
                nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+        pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
 }
 /*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba80417748..f5f8a470a647 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
+int pnfs_commit_and_return_layout(struct inode *);
 void pnfs_ld_write_done(struct nfs_write_data *);
 void pnfs_ld_read_done(struct nfs_read_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
        return 0;
 }
+static inline int pnfs_commit_and_return_layout(struct inode *inode)
+{
+        return 0;
+}
 static inline bool
 pnfs_ld_layoutret_on_setattr(struct inode *inode)
 {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 01168865dd37..a2720071f282 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -264,7 +264,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                iattr->ia_valid |= ATTR_SIZE;
        }
        if (bmval[0] & FATTR4_WORD0_ACL) {
-                int nace;
+                u32 nace;
                struct nfs4_ace *ace;
                READ_BUF(4); len += 4;
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a51ac0d..8b29d2164da6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -9,6 +9,7 @@
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/fs.h>
+#include <linux/nsproxy.h>
 #include "internal.h"
 #include "pnode.h"
@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest,
 int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
                    struct mount *source_mnt, struct list_head *tree_list)
 {
+        struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
        struct mount *m, *child;
        int ret = 0;
        struct mount *prev_dest_mnt = dest_mnt;
@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
                source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
+                /* Notice when we are propagating across user namespaces */
+                if (m->mnt_ns->user_ns != user_ns)
+                        type |= CL_UNPRIVILEGED;
                child = copy_tree(source, source->mnt.mnt_root, type);
                if (IS_ERR(child)) {
                        ret = PTR_ERR(child);
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3445c..a0493d5ebfbf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -23,6 +23,7 @@
 #define CL_MAKE_SHARED          0x08
 #define CL_PRIVATE              0x10
 #define CL_SHARED_TO_SLAVE      0x20
+#define CL_UNPRIVILEGED         0x40
 static inline void set_mnt_shared(struct mount *mnt)
 {
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f7ed9ee46eb9..cbd0f1b324b9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -143,6 +143,7 @@ static const char * const task_state_array[] = {
        "x (dead)",             /*  64 */
        "K (wakekill)",         /* 128 */
        "W (waking)",           /* 256 */
+        "P (parked)",           /* 512 */
 };
 static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 4b3b3ffb52f1..21e1a8f1659d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -755,37 +755,8 @@ void pde_put(struct proc_dir_entry *pde)
                free_proc_entry(pde);
 }
-/*
+static void entry_rundown(struct proc_dir_entry *de)
- * Remove a /proc entry and free it if it's not currently in use.
- */
-void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
-        struct proc_dir_entry **p;
-        struct proc_dir_entry *de = NULL;
-        const char *fn = name;
-        unsigned int len;
-        spin_lock(&proc_subdir_lock);
-        if (__xlate_proc_name(name, &parent, &fn) != 0) {
-                spin_unlock(&proc_subdir_lock);
-                return;
-        }
-        len = strlen(fn);
-        for (p = &parent->subdir; *p; p=&(*p)->next ) {
-                if (proc_match(len, fn, *p)) {
-                        de = *p;
-                        *p = de->next;
-                        de->next = NULL;
-                        break;
-                }
-        }
-        spin_unlock(&proc_subdir_lock);
-        if (!de) {
-                WARN(1, "name '%s'\n", name);
-                return;
-        }
        spin_lock(&de->pde_unload_lock);
        /*
         * Stop accepting new callers into module. If you're
@@ -817,6 +788,40 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
                spin_lock(&de->pde_unload_lock);
        }
        spin_unlock(&de->pde_unload_lock);
+}
+/*
+ * Remove a /proc entry and free it if it's not currently in use.
+ */
+void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
+{
+        struct proc_dir_entry **p;
+        struct proc_dir_entry *de = NULL;
+        const char *fn = name;
+        unsigned int len;
+        spin_lock(&proc_subdir_lock);
+        if (__xlate_proc_name(name, &parent, &fn) != 0) {
+                spin_unlock(&proc_subdir_lock);
+                return;
+        }
+        len = strlen(fn);
+        for (p = &parent->subdir; *p; p=&(*p)->next ) {
+                if (proc_match(len, fn, *p)) {
+                        de = *p;
+                        *p = de->next;
+                        de->next = NULL;
+                        break;
+                }
+        }
+        spin_unlock(&proc_subdir_lock);
+        if (!de) {
+                WARN(1, "name '%s'\n", name);
+                return;
+        }
+        entry_rundown(de);
        if (S_ISDIR(de->mode))
                parent->nlink--;
@@ -827,3 +832,57 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
        pde_put(de);
 }
 EXPORT_SYMBOL(remove_proc_entry);
+int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
+{
+        struct proc_dir_entry **p;
+        struct proc_dir_entry *root = NULL, *de, *next;
+        const char *fn = name;
+        unsigned int len;
+        spin_lock(&proc_subdir_lock);
+        if (__xlate_proc_name(name, &parent, &fn) != 0) {
+                spin_unlock(&proc_subdir_lock);
+                return -ENOENT;
+        }
+        len = strlen(fn);
+        for (p = &parent->subdir; *p; p=&(*p)->next ) {
+                if (proc_match(len, fn, *p)) {
+                        root = *p;
+                        *p = root->next;
+                        root->next = NULL;
+                        break;
+                }
+        }
+        if (!root) {
+                spin_unlock(&proc_subdir_lock);
+                return -ENOENT;
+        }
+        de = root;
+        while (1) {
+                next = de->subdir;
+                if (next) {
+                        de->subdir = next->next;
+                        next->next = NULL;
+                        de = next;
+                        continue;
+                }
+                spin_unlock(&proc_subdir_lock);
+                entry_rundown(de);
+                next = de->parent;
+                if (S_ISDIR(de->mode))
+                        next->nlink--;
+                de->nlink = 0;
+                if (de == root)
+                        break;
+                pde_put(de);
+                spin_lock(&proc_subdir_lock);
+                de = next;
+        }
+        pde_put(root);
+        return 0;
+}
+EXPORT_SYMBOL(remove_proc_subtree);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac26bac..9c7fab1d23f0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/bitops.h>
+#include <linux/user_namespace.h>
 #include <linux/mount.h>
 #include <linux/pid_namespace.h>
 #include <linux/parser.h>
@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
        } else {
                ns = task_active_pid_ns(current);
                options = data;
+                if (!current_user_ns()->may_mount_proc)
+                        return ERR_PTR(-EPERM);
        }
        sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
diff --git a/fs/read_write.c b/fs/read_write.c
index a698eff457fb..e6ddc8dceb96 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
 #include <linux/splice.h>
 #include <linux/compat.h>
 #include "read_write.h"
+#include "internal.h"
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -417,6 +418,33 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 EXPORT_SYMBOL(do_sync_write);
+ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
+{
+        mm_segment_t old_fs;
+        const char __user *p;
+        ssize_t ret;
+        if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
+                return -EINVAL;
+        old_fs = get_fs();
+        set_fs(get_ds());
+        p = (__force const char __user *)buf;
+        if (count > MAX_RW_COUNT)
+                count =  MAX_RW_COUNT;
+        if (file->f_op->write)
+                ret = file->f_op->write(file, p, count, pos);
+        else
+                ret = do_sync_write(file, p, count, pos);
+        set_fs(old_fs);
+        if (ret > 0) {
+                fsnotify_modify(file);
+                add_wchar(current, ret);
+        }
+        inc_syscw(current);
+        return ret;
+}
 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
 {
        ssize_t ret;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index c196369fe408..4cce1d9552fb 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -187,8 +187,8 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
        if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
                return -ENOSPC;
-        if (name[0] == '.' && (name[1] == '\0' ||
+        if (name[0] == '.' && (namelen < 2 ||
-                               (name[1] == '.' && name[2] == '\0')))
+                               (namelen == 2 && name[1] == '.')))
                return 0;
        dentry = lookup_one_len(name, dbuf->xadir, namelen);
diff --git a/fs/splice.c b/fs/splice.c
index 718bd0056384..29e394e49ddd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/gfp.h>
 #include <linux/socket.h>
+#include "internal.h"
 /*
 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -1048,9 +1049,10 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 {
        int ret;
        void *data;
+        loff_t tmp = sd->pos;
        data = buf->ops->map(pipe, buf, 0);
-        ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
+        ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
        buf->ops->unmap(pipe, buf, data);
        return ret;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2fbdff6be25c..e14512678c9b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -1020,6 +1020,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
                ino = parent_sd->s_ino;
                if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
                        filp->f_pos++;
+                else
+                        return 0;
        }
        if (filp->f_pos == 1) {
                if (parent_sd->s_parent)
@@ -1028,6 +1030,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
                        ino = parent_sd->s_ino;
                if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
                        filp->f_pos++;
+                else
+                        return 0;
        }
        mutex_lock(&sysfs_mutex);
        for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
@@ -1058,10 +1062,21 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
        return 0;
 }
+static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+        struct inode *inode = file_inode(file);
+        loff_t ret;
+        mutex_lock(&inode->i_mutex);
+        ret = generic_file_llseek(file, offset, whence);
+        mutex_unlock(&inode->i_mutex);
+        return ret;
+}
 const struct file_operations sysfs_dir_operations = {
        .read           = generic_read_dir,
        .readdir        = sysfs_readdir,
        .release        = sysfs_dir_release,
-        .llseek         = generic_file_llseek,
+        .llseek         = sysfs_dir_llseek,
 };
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8d924b5ec733..afd83273e6ce 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/user_namespace.h>
 #include "sysfs.h"
@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
        struct super_block *sb;
        int error;
+        if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
+                return ERR_PTR(-EPERM);
        info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info)
                return ERR_PTR(-ENOMEM);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ac838b844936..f21acf0ef01f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1568,6 +1568,12 @@ static int ubifs_remount_rw(struct ubifs_info *c)
        c->remounting_rw = 1;
        c->ro_mount = 0;
+        if (c->space_fixup) {
+                err = ubifs_fixup_free_space(c);
+                if (err)
+                        return err;
+        }
        err = check_free_space(c);
        if (err)
                goto out;
@@ -1684,12 +1690,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
                err = dbg_check_space_info(c);
        }
-        if (c->space_fixup) {
-                err = ubifs_fixup_free_space(c);
-                if (err)
-                        goto out;
-        }
        mutex_unlock(&c->umount_mutex);
        return err;