aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/btrfs/ctree.c30
-rw-r--r--fs/btrfs/disk-io.c14
-rw-r--r--fs/btrfs/extent-tree.c84
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file.c9
-rw-r--r--fs/btrfs/inode.c22
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/scrub.c3
-rw-r--r--fs/btrfs/send.c10
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/dcache.c16
-rw-r--r--fs/ext4/extents.c11
-rw-r--r--fs/ext4/indirect.c4
-rw-r--r--fs/gfs2/file.c5
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/lock_dlm.c39
-rw-r--r--fs/gfs2/rgrp.c32
-rw-r--r--fs/internal.h5
-rw-r--r--fs/namespace.c54
-rw-r--r--fs/nfs/blocklayout/blocklayoutdm.c4
-rw-r--r--fs/nfs/idmap.c13
-rw-r--r--fs/nfs/nfs4filelayout.c1
-rw-r--r--fs/nfs/nfs4proc.c16
-rw-r--r--fs/nfs/pnfs.c81
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/nfsd/nfscache.c11
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/pnode.c6
-rw-r--r--fs/pnode.h1
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/read_write.c28
-rw-r--r--fs/reiserfs/xattr.c4
-rw-r--r--fs/splice.c4
-rw-r--r--fs/sysfs/dir.c17
-rw-r--r--fs/sysfs/mount.c4
-rw-r--r--fs/ubifs/super.c12
41 files changed, 474 insertions, 142 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aea605c98ba6..aae187a7f94a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -551,6 +551,7 @@ struct block_device *bdgrab(struct block_device *bdev)
551 ihold(bdev->bd_inode); 551 ihold(bdev->bd_inode);
552 return bdev; 552 return bdev;
553} 553}
554EXPORT_SYMBOL(bdgrab);
554 555
555long nr_blockdev_pages(void) 556long nr_blockdev_pages(void)
556{ 557{
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ecd25a1b4e51..ca9d8f1a3bb6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -651,6 +651,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
651 if (tree_mod_dont_log(fs_info, NULL)) 651 if (tree_mod_dont_log(fs_info, NULL))
652 return 0; 652 return 0;
653 653
654 __tree_mod_log_free_eb(fs_info, old_root);
655
654 ret = tree_mod_alloc(fs_info, flags, &tm); 656 ret = tree_mod_alloc(fs_info, flags, &tm);
655 if (ret < 0) 657 if (ret < 0)
656 goto out; 658 goto out;
@@ -736,7 +738,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
736static noinline void 738static noinline void
737tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, 739tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
738 struct extent_buffer *src, unsigned long dst_offset, 740 struct extent_buffer *src, unsigned long dst_offset,
739 unsigned long src_offset, int nr_items) 741 unsigned long src_offset, int nr_items, int log_removal)
740{ 742{
741 int ret; 743 int ret;
742 int i; 744 int i;
@@ -750,10 +752,12 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
750 } 752 }
751 753
752 for (i = 0; i < nr_items; i++) { 754 for (i = 0; i < nr_items; i++) {
753 ret = tree_mod_log_insert_key_locked(fs_info, src, 755 if (log_removal) {
754 i + src_offset, 756 ret = tree_mod_log_insert_key_locked(fs_info, src,
755 MOD_LOG_KEY_REMOVE); 757 i + src_offset,
756 BUG_ON(ret < 0); 758 MOD_LOG_KEY_REMOVE);
759 BUG_ON(ret < 0);
760 }
757 ret = tree_mod_log_insert_key_locked(fs_info, dst, 761 ret = tree_mod_log_insert_key_locked(fs_info, dst,
758 i + dst_offset, 762 i + dst_offset,
759 MOD_LOG_KEY_ADD); 763 MOD_LOG_KEY_ADD);
@@ -927,7 +931,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
927 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 931 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
928 BUG_ON(ret); /* -ENOMEM */ 932 BUG_ON(ret); /* -ENOMEM */
929 } 933 }
930 tree_mod_log_free_eb(root->fs_info, buf);
931 clean_tree_block(trans, root, buf); 934 clean_tree_block(trans, root, buf);
932 *last_ref = 1; 935 *last_ref = 1;
933 } 936 }
@@ -1046,6 +1049,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1046 btrfs_set_node_ptr_generation(parent, parent_slot, 1049 btrfs_set_node_ptr_generation(parent, parent_slot,
1047 trans->transid); 1050 trans->transid);
1048 btrfs_mark_buffer_dirty(parent); 1051 btrfs_mark_buffer_dirty(parent);
1052 tree_mod_log_free_eb(root->fs_info, buf);
1049 btrfs_free_tree_block(trans, root, buf, parent_start, 1053 btrfs_free_tree_block(trans, root, buf, parent_start,
1050 last_ref); 1054 last_ref);
1051 } 1055 }
@@ -1750,7 +1754,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1750 goto enospc; 1754 goto enospc;
1751 } 1755 }
1752 1756
1753 tree_mod_log_free_eb(root->fs_info, root->node);
1754 tree_mod_log_set_root_pointer(root, child); 1757 tree_mod_log_set_root_pointer(root, child);
1755 rcu_assign_pointer(root->node, child); 1758 rcu_assign_pointer(root->node, child);
1756 1759
@@ -2995,7 +2998,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2995 push_items = min(src_nritems - 8, push_items); 2998 push_items = min(src_nritems - 8, push_items);
2996 2999
2997 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, 3000 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
2998 push_items); 3001 push_items, 1);
2999 copy_extent_buffer(dst, src, 3002 copy_extent_buffer(dst, src,
3000 btrfs_node_key_ptr_offset(dst_nritems), 3003 btrfs_node_key_ptr_offset(dst_nritems),
3001 btrfs_node_key_ptr_offset(0), 3004 btrfs_node_key_ptr_offset(0),
@@ -3066,7 +3069,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
3066 sizeof(struct btrfs_key_ptr)); 3069 sizeof(struct btrfs_key_ptr));
3067 3070
3068 tree_mod_log_eb_copy(root->fs_info, dst, src, 0, 3071 tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
3069 src_nritems - push_items, push_items); 3072 src_nritems - push_items, push_items, 1);
3070 copy_extent_buffer(dst, src, 3073 copy_extent_buffer(dst, src,
3071 btrfs_node_key_ptr_offset(0), 3074 btrfs_node_key_ptr_offset(0),
3072 btrfs_node_key_ptr_offset(src_nritems - push_items), 3075 btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3218,12 +3221,18 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3218 int mid; 3221 int mid;
3219 int ret; 3222 int ret;
3220 u32 c_nritems; 3223 u32 c_nritems;
3224 int tree_mod_log_removal = 1;
3221 3225
3222 c = path->nodes[level]; 3226 c = path->nodes[level];
3223 WARN_ON(btrfs_header_generation(c) != trans->transid); 3227 WARN_ON(btrfs_header_generation(c) != trans->transid);
3224 if (c == root->node) { 3228 if (c == root->node) {
3225 /* trying to split the root, lets make a new one */ 3229 /* trying to split the root, lets make a new one */
3226 ret = insert_new_root(trans, root, path, level + 1); 3230 ret = insert_new_root(trans, root, path, level + 1);
3231 /*
3232 * removal of root nodes has been logged by
3233 * tree_mod_log_set_root_pointer due to locking
3234 */
3235 tree_mod_log_removal = 0;
3227 if (ret) 3236 if (ret)
3228 return ret; 3237 return ret;
3229 } else { 3238 } else {
@@ -3261,7 +3270,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3261 (unsigned long)btrfs_header_chunk_tree_uuid(split), 3270 (unsigned long)btrfs_header_chunk_tree_uuid(split),
3262 BTRFS_UUID_SIZE); 3271 BTRFS_UUID_SIZE);
3263 3272
3264 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); 3273 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid,
3274 tree_mod_log_removal);
3265 copy_extent_buffer(split, c, 3275 copy_extent_buffer(split, c,
3266 btrfs_node_key_ptr_offset(0), 3276 btrfs_node_key_ptr_offset(0),
3267 btrfs_node_key_ptr_offset(mid), 3277 btrfs_node_key_ptr_offset(mid),
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7d84651e850b..6d19a0a554aa 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1291,6 +1291,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1291 0, objectid, NULL, 0, 0, 0); 1291 0, objectid, NULL, 0, 0, 0);
1292 if (IS_ERR(leaf)) { 1292 if (IS_ERR(leaf)) {
1293 ret = PTR_ERR(leaf); 1293 ret = PTR_ERR(leaf);
1294 leaf = NULL;
1294 goto fail; 1295 goto fail;
1295 } 1296 }
1296 1297
@@ -1334,11 +1335,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1334 1335
1335 btrfs_tree_unlock(leaf); 1336 btrfs_tree_unlock(leaf);
1336 1337
1338 return root;
1339
1337fail: 1340fail:
1338 if (ret) 1341 if (leaf) {
1339 return ERR_PTR(ret); 1342 btrfs_tree_unlock(leaf);
1343 free_extent_buffer(leaf);
1344 }
1345 kfree(root);
1340 1346
1341 return root; 1347 return ERR_PTR(ret);
1342} 1348}
1343 1349
1344static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, 1350static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
@@ -3253,7 +3259,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3253 if (btrfs_root_refs(&root->root_item) == 0) 3259 if (btrfs_root_refs(&root->root_item) == 0)
3254 synchronize_srcu(&fs_info->subvol_srcu); 3260 synchronize_srcu(&fs_info->subvol_srcu);
3255 3261
3256 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3262 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
3257 btrfs_free_log(NULL, root); 3263 btrfs_free_log(NULL, root);
3258 btrfs_free_log_root_tree(NULL, fs_info); 3264 btrfs_free_log_root_tree(NULL, fs_info);
3259 } 3265 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9ac2eca681eb..3d551231caba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -257,7 +257,8 @@ static int exclude_super_stripes(struct btrfs_root *root,
257 cache->bytes_super += stripe_len; 257 cache->bytes_super += stripe_len;
258 ret = add_excluded_extent(root, cache->key.objectid, 258 ret = add_excluded_extent(root, cache->key.objectid,
259 stripe_len); 259 stripe_len);
260 BUG_ON(ret); /* -ENOMEM */ 260 if (ret)
261 return ret;
261 } 262 }
262 263
263 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 264 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -265,13 +266,17 @@ static int exclude_super_stripes(struct btrfs_root *root,
265 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 266 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
266 cache->key.objectid, bytenr, 267 cache->key.objectid, bytenr,
267 0, &logical, &nr, &stripe_len); 268 0, &logical, &nr, &stripe_len);
268 BUG_ON(ret); /* -ENOMEM */ 269 if (ret)
270 return ret;
269 271
270 while (nr--) { 272 while (nr--) {
271 cache->bytes_super += stripe_len; 273 cache->bytes_super += stripe_len;
272 ret = add_excluded_extent(root, logical[nr], 274 ret = add_excluded_extent(root, logical[nr],
273 stripe_len); 275 stripe_len);
274 BUG_ON(ret); /* -ENOMEM */ 276 if (ret) {
277 kfree(logical);
278 return ret;
279 }
275 } 280 }
276 281
277 kfree(logical); 282 kfree(logical);
@@ -4438,7 +4443,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4438 spin_lock(&sinfo->lock); 4443 spin_lock(&sinfo->lock);
4439 spin_lock(&block_rsv->lock); 4444 spin_lock(&block_rsv->lock);
4440 4445
4441 block_rsv->size = num_bytes; 4446 block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
4442 4447
4443 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 4448 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
4444 sinfo->bytes_reserved + sinfo->bytes_readonly + 4449 sinfo->bytes_reserved + sinfo->bytes_readonly +
@@ -4793,14 +4798,49 @@ out_fail:
4793 * If the inodes csum_bytes is the same as the original 4798 * If the inodes csum_bytes is the same as the original
4794 * csum_bytes then we know we haven't raced with any free()ers 4799 * csum_bytes then we know we haven't raced with any free()ers
4795 * so we can just reduce our inodes csum bytes and carry on. 4800 * so we can just reduce our inodes csum bytes and carry on.
4796 * Otherwise we have to do the normal free thing to account for
4797 * the case that the free side didn't free up its reserve
4798 * because of this outstanding reservation.
4799 */ 4801 */
4800 if (BTRFS_I(inode)->csum_bytes == csum_bytes) 4802 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
4801 calc_csum_metadata_size(inode, num_bytes, 0); 4803 calc_csum_metadata_size(inode, num_bytes, 0);
4802 else 4804 } else {
4803 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 4805 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
4806 u64 bytes;
4807
4808 /*
4809 * This is tricky, but first we need to figure out how much we
4810 * free'd from any free-ers that occured during this
4811 * reservation, so we reset ->csum_bytes to the csum_bytes
4812 * before we dropped our lock, and then call the free for the
4813 * number of bytes that were freed while we were trying our
4814 * reservation.
4815 */
4816 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
4817 BTRFS_I(inode)->csum_bytes = csum_bytes;
4818 to_free = calc_csum_metadata_size(inode, bytes, 0);
4819
4820
4821 /*
4822 * Now we need to see how much we would have freed had we not
4823 * been making this reservation and our ->csum_bytes were not
4824 * artificially inflated.
4825 */
4826 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
4827 bytes = csum_bytes - orig_csum_bytes;
4828 bytes = calc_csum_metadata_size(inode, bytes, 0);
4829
4830 /*
4831 * Now reset ->csum_bytes to what it should be. If bytes is
4832 * more than to_free then we would have free'd more space had we
4833 * not had an artificially high ->csum_bytes, so we need to free
4834 * the remainder. If bytes is the same or less then we don't
4835 * need to do anything, the other free-ers did the correct
4836 * thing.
4837 */
4838 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
4839 if (bytes > to_free)
4840 to_free = bytes - to_free;
4841 else
4842 to_free = 0;
4843 }
4804 spin_unlock(&BTRFS_I(inode)->lock); 4844 spin_unlock(&BTRFS_I(inode)->lock);
4805 if (dropped) 4845 if (dropped)
4806 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4846 to_free += btrfs_calc_trans_metadata_size(root, dropped);
@@ -7947,7 +7987,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7947 * info has super bytes accounted for, otherwise we'll think 7987 * info has super bytes accounted for, otherwise we'll think
7948 * we have more space than we actually do. 7988 * we have more space than we actually do.
7949 */ 7989 */
7950 exclude_super_stripes(root, cache); 7990 ret = exclude_super_stripes(root, cache);
7991 if (ret) {
7992 /*
7993 * We may have excluded something, so call this just in
7994 * case.
7995 */
7996 free_excluded_extents(root, cache);
7997 kfree(cache->free_space_ctl);
7998 kfree(cache);
7999 goto error;
8000 }
7951 8001
7952 /* 8002 /*
7953 * check for two cases, either we are full, and therefore 8003 * check for two cases, either we are full, and therefore
@@ -8089,7 +8139,17 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8089 8139
8090 cache->last_byte_to_unpin = (u64)-1; 8140 cache->last_byte_to_unpin = (u64)-1;
8091 cache->cached = BTRFS_CACHE_FINISHED; 8141 cache->cached = BTRFS_CACHE_FINISHED;
8092 exclude_super_stripes(root, cache); 8142 ret = exclude_super_stripes(root, cache);
8143 if (ret) {
8144 /*
8145 * We may have excluded something, so call this just in
8146 * case.
8147 */
8148 free_excluded_extents(root, cache);
8149 kfree(cache->free_space_ctl);
8150 kfree(cache);
8151 return ret;
8152 }
8093 8153
8094 add_new_free_space(cache, root->fs_info, chunk_offset, 8154 add_new_free_space(cache, root->fs_info, chunk_offset,
8095 chunk_offset + size); 8155 chunk_offset + size);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..cdee391fc7bf 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1257,6 +1257,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1257 GFP_NOFS); 1257 GFP_NOFS);
1258} 1258}
1259 1259
1260int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1261{
1262 unsigned long index = start >> PAGE_CACHE_SHIFT;
1263 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1264 struct page *page;
1265
1266 while (index <= end_index) {
1267 page = find_get_page(inode->i_mapping, index);
1268 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1269 clear_page_dirty_for_io(page);
1270 page_cache_release(page);
1271 index++;
1272 }
1273 return 0;
1274}
1275
1276int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1277{
1278 unsigned long index = start >> PAGE_CACHE_SHIFT;
1279 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1280 struct page *page;
1281
1282 while (index <= end_index) {
1283 page = find_get_page(inode->i_mapping, index);
1284 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1285 account_page_redirty(page);
1286 __set_page_dirty_nobuffers(page);
1287 page_cache_release(page);
1288 index++;
1289 }
1290 return 0;
1291}
1292
1260/* 1293/*
1261 * helper function to set both pages and extents in the tree writeback 1294 * helper function to set both pages and extents in the tree writeback
1262 */ 1295 */
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..258c92156857 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -325,6 +325,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
325 unsigned long *map_len); 325 unsigned long *map_len);
326int extent_range_uptodate(struct extent_io_tree *tree, 326int extent_range_uptodate(struct extent_io_tree *tree,
327 u64 start, u64 end); 327 u64 start, u64 end);
328int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
329int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
328int extent_clear_unlock_delalloc(struct inode *inode, 330int extent_clear_unlock_delalloc(struct inode *inode,
329 struct extent_io_tree *tree, 331 struct extent_io_tree *tree,
330 u64 start, u64 end, struct page *locked_page, 332 u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ec160202be3e..c4628a201cb3 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -118,9 +118,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
118 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 118 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
119 csums_in_item /= csum_size; 119 csums_in_item /= csum_size;
120 120
121 if (csum_offset >= csums_in_item) { 121 if (csum_offset == csums_in_item) {
122 ret = -EFBIG; 122 ret = -EFBIG;
123 goto fail; 123 goto fail;
124 } else if (csum_offset > csums_in_item) {
125 goto fail;
124 } 126 }
125 } 127 }
126 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 128 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
@@ -728,7 +730,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
728 return -ENOMEM; 730 return -ENOMEM;
729 731
730 sector_sum = sums->sums; 732 sector_sum = sums->sums;
731 trans->adding_csums = 1;
732again: 733again:
733 next_offset = (u64)-1; 734 next_offset = (u64)-1;
734 found_next = 0; 735 found_next = 0;
@@ -899,7 +900,6 @@ next_sector:
899 goto again; 900 goto again;
900 } 901 }
901out: 902out:
902 trans->adding_csums = 0;
903 btrfs_free_path(path); 903 btrfs_free_path(path);
904 return ret; 904 return ret;
905 905
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5b4ea5f55b8f..ade03e6f7bd2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2142,6 +2142,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2142{ 2142{
2143 struct inode *inode = file_inode(file); 2143 struct inode *inode = file_inode(file);
2144 struct extent_state *cached_state = NULL; 2144 struct extent_state *cached_state = NULL;
2145 struct btrfs_root *root = BTRFS_I(inode)->root;
2145 u64 cur_offset; 2146 u64 cur_offset;
2146 u64 last_byte; 2147 u64 last_byte;
2147 u64 alloc_start; 2148 u64 alloc_start;
@@ -2169,6 +2170,11 @@ static long btrfs_fallocate(struct file *file, int mode,
2169 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); 2170 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
2170 if (ret) 2171 if (ret)
2171 return ret; 2172 return ret;
2173 if (root->fs_info->quota_enabled) {
2174 ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
2175 if (ret)
2176 goto out_reserve_fail;
2177 }
2172 2178
2173 /* 2179 /*
2174 * wait for ordered IO before we have any locks. We'll loop again 2180 * wait for ordered IO before we have any locks. We'll loop again
@@ -2272,6 +2278,9 @@ static long btrfs_fallocate(struct file *file, int mode,
2272 &cached_state, GFP_NOFS); 2278 &cached_state, GFP_NOFS);
2273out: 2279out:
2274 mutex_unlock(&inode->i_mutex); 2280 mutex_unlock(&inode->i_mutex);
2281 if (root->fs_info->quota_enabled)
2282 btrfs_qgroup_free(root, alloc_end - alloc_start);
2283out_reserve_fail:
2275 /* Let go of our reservation. */ 2284 /* Let go of our reservation. */
2276 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); 2285 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2277 return ret; 2286 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ca1b767d51f7..09c58a35b429 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -353,6 +353,7 @@ static noinline int compress_file_range(struct inode *inode,
353 int i; 353 int i;
354 int will_compress; 354 int will_compress;
355 int compress_type = root->fs_info->compress_type; 355 int compress_type = root->fs_info->compress_type;
356 int redirty = 0;
356 357
357 /* if this is a small write inside eof, kick off a defrag */ 358 /* if this is a small write inside eof, kick off a defrag */
358 if ((end - start + 1) < 16 * 1024 && 359 if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +416,17 @@ again:
415 if (BTRFS_I(inode)->force_compress) 416 if (BTRFS_I(inode)->force_compress)
416 compress_type = BTRFS_I(inode)->force_compress; 417 compress_type = BTRFS_I(inode)->force_compress;
417 418
419 /*
420 * we need to call clear_page_dirty_for_io on each
421 * page in the range. Otherwise applications with the file
422 * mmap'd can wander in and change the page contents while
423 * we are compressing them.
424 *
425 * If the compression fails for any reason, we set the pages
426 * dirty again later on.
427 */
428 extent_range_clear_dirty_for_io(inode, start, end);
429 redirty = 1;
418 ret = btrfs_compress_pages(compress_type, 430 ret = btrfs_compress_pages(compress_type,
419 inode->i_mapping, start, 431 inode->i_mapping, start,
420 total_compressed, pages, 432 total_compressed, pages,
@@ -554,6 +566,8 @@ cleanup_and_bail_uncompressed:
554 __set_page_dirty_nobuffers(locked_page); 566 __set_page_dirty_nobuffers(locked_page);
555 /* unlocked later on in the async handlers */ 567 /* unlocked later on in the async handlers */
556 } 568 }
569 if (redirty)
570 extent_range_redirty_for_io(inode, start, end);
557 add_async_extent(async_cow, start, end - start + 1, 571 add_async_extent(async_cow, start, end - start + 1,
558 0, NULL, 0, BTRFS_COMPRESS_NONE); 572 0, NULL, 0, BTRFS_COMPRESS_NONE);
559 *num_added += 1; 573 *num_added += 1;
@@ -1743,8 +1757,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1743 struct btrfs_ordered_sum *sum; 1757 struct btrfs_ordered_sum *sum;
1744 1758
1745 list_for_each_entry(sum, list, list) { 1759 list_for_each_entry(sum, list, list) {
1760 trans->adding_csums = 1;
1746 btrfs_csum_file_blocks(trans, 1761 btrfs_csum_file_blocks(trans,
1747 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1762 BTRFS_I(inode)->root->fs_info->csum_root, sum);
1763 trans->adding_csums = 0;
1748 } 1764 }
1749 return 0; 1765 return 0;
1750} 1766}
@@ -3679,11 +3695,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
3679 * 1 for the dir item 3695 * 1 for the dir item
3680 * 1 for the dir index 3696 * 1 for the dir index
3681 * 1 for the inode ref 3697 * 1 for the inode ref
3682 * 1 for the inode ref in the tree log
3683 * 2 for the dir entries in the log
3684 * 1 for the inode 3698 * 1 for the inode
3685 */ 3699 */
3686 trans = btrfs_start_transaction(root, 8); 3700 trans = btrfs_start_transaction(root, 5);
3687 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 3701 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
3688 return trans; 3702 return trans;
3689 3703
@@ -8127,7 +8141,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8127 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items 8141 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
8128 * should cover the worst case number of items we'll modify. 8142 * should cover the worst case number of items we'll modify.
8129 */ 8143 */
8130 trans = btrfs_start_transaction(root, 20); 8144 trans = btrfs_start_transaction(root, 11);
8131 if (IS_ERR(trans)) { 8145 if (IS_ERR(trans)) {
8132 ret = PTR_ERR(trans); 8146 ret = PTR_ERR(trans);
8133 goto out_notrans; 8147 goto out_notrans;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index dc08d77b717e..005c45db699e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
557 INIT_LIST_HEAD(&splice); 557 INIT_LIST_HEAD(&splice);
558 INIT_LIST_HEAD(&works); 558 INIT_LIST_HEAD(&works);
559 559
560 mutex_lock(&root->fs_info->ordered_operations_mutex);
560 spin_lock(&root->fs_info->ordered_extent_lock); 561 spin_lock(&root->fs_info->ordered_extent_lock);
561 list_splice_init(&root->fs_info->ordered_extents, &splice); 562 list_splice_init(&root->fs_info->ordered_extents, &splice);
562 while (!list_empty(&splice)) { 563 while (!list_empty(&splice)) {
@@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
600 601
601 cond_resched(); 602 cond_resched();
602 } 603 }
604 mutex_unlock(&root->fs_info->ordered_operations_mutex);
603} 605}
604 606
605/* 607/*
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5471e47d6559..b44124dd2370 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1153,7 +1153,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1153 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, 1153 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
1154 sgn > 0 ? node->seq - 1 : node->seq, &roots); 1154 sgn > 0 ? node->seq - 1 : node->seq, &roots);
1155 if (ret < 0) 1155 if (ret < 0)
1156 goto out; 1156 return ret;
1157 1157
1158 spin_lock(&fs_info->qgroup_lock); 1158 spin_lock(&fs_info->qgroup_lock);
1159 quota_root = fs_info->quota_root; 1159 quota_root = fs_info->quota_root;
@@ -1275,7 +1275,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1275 ret = 0; 1275 ret = 0;
1276unlock: 1276unlock:
1277 spin_unlock(&fs_info->qgroup_lock); 1277 spin_unlock(&fs_info->qgroup_lock);
1278out:
1279 ulist_free(roots); 1278 ulist_free(roots);
1280 ulist_free(tmp); 1279 ulist_free(tmp);
1281 1280
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 53c3501fa4ca..85e072b956d5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
542 eb = path->nodes[0]; 542 eb = path->nodes[0];
543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
544 item_size = btrfs_item_size_nr(eb, path->slots[0]); 544 item_size = btrfs_item_size_nr(eb, path->slots[0]);
545 btrfs_release_path(path);
546 545
547 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
548 do { 547 do {
@@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
558 ret < 0 ? -1 : ref_level, 557 ret < 0 ? -1 : ref_level,
559 ret < 0 ? -1 : ref_root); 558 ret < 0 ? -1 : ref_root);
560 } while (ret != 1); 559 } while (ret != 1);
560 btrfs_release_path(path);
561 } else { 561 } else {
562 btrfs_release_path(path);
562 swarn.path = path; 563 swarn.path = path;
563 swarn.dev = dev; 564 swarn.dev = dev;
564 iterate_extent_inodes(fs_info, found_key.objectid, 565 iterate_extent_inodes(fs_info, found_key.objectid,
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f7a8b861058b..c85e7c6b4598 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3945,12 +3945,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
3945 found_key.type != key.type) { 3945 found_key.type != key.type) {
3946 key.offset += right_len; 3946 key.offset += right_len;
3947 break; 3947 break;
3948 } else { 3948 }
3949 if (found_key.offset != key.offset + right_len) { 3949 if (found_key.offset != key.offset + right_len) {
3950 /* Should really not happen */ 3950 ret = 0;
3951 ret = -EIO; 3951 goto out;
3952 goto out;
3953 }
3954 } 3952 }
3955 key = found_key; 3953 key = found_key;
3956 } 3954 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5989a92236f7..2854c824ab64 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4935,7 +4935,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
4935 em = lookup_extent_mapping(em_tree, chunk_start, 1); 4935 em = lookup_extent_mapping(em_tree, chunk_start, 1);
4936 read_unlock(&em_tree->lock); 4936 read_unlock(&em_tree->lock);
4937 4937
4938 BUG_ON(!em || em->start != chunk_start); 4938 if (!em) {
4939 printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
4940 chunk_start);
4941 return -EIO;
4942 }
4943
4944 if (em->start != chunk_start) {
4945 printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
4946 em->start, chunk_start);
4947 free_extent_map(em);
4948 return -EIO;
4949 }
4939 map = (struct map_lookup *)em->bdev; 4950 map = (struct map_lookup *)em->bdev;
4940 4951
4941 length = em->len; 4952 length = em->len;
diff --git a/fs/dcache.c b/fs/dcache.c
index fbfae008ba44..e8bc3420d63e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2542,7 +2542,6 @@ static int prepend_path(const struct path *path,
2542 bool slash = false; 2542 bool slash = false;
2543 int error = 0; 2543 int error = 0;
2544 2544
2545 br_read_lock(&vfsmount_lock);
2546 while (dentry != root->dentry || vfsmnt != root->mnt) { 2545 while (dentry != root->dentry || vfsmnt != root->mnt) {
2547 struct dentry * parent; 2546 struct dentry * parent;
2548 2547
@@ -2572,8 +2571,6 @@ static int prepend_path(const struct path *path,
2572 if (!error && !slash) 2571 if (!error && !slash)
2573 error = prepend(buffer, buflen, "/", 1); 2572 error = prepend(buffer, buflen, "/", 1);
2574 2573
2575out:
2576 br_read_unlock(&vfsmount_lock);
2577 return error; 2574 return error;
2578 2575
2579global_root: 2576global_root:
@@ -2590,7 +2587,7 @@ global_root:
2590 error = prepend(buffer, buflen, "/", 1); 2587 error = prepend(buffer, buflen, "/", 1);
2591 if (!error) 2588 if (!error)
2592 error = is_mounted(vfsmnt) ? 1 : 2; 2589 error = is_mounted(vfsmnt) ? 1 : 2;
2593 goto out; 2590 return error;
2594} 2591}
2595 2592
2596/** 2593/**
@@ -2617,9 +2614,11 @@ char *__d_path(const struct path *path,
2617 int error; 2614 int error;
2618 2615
2619 prepend(&res, &buflen, "\0", 1); 2616 prepend(&res, &buflen, "\0", 1);
2617 br_read_lock(&vfsmount_lock);
2620 write_seqlock(&rename_lock); 2618 write_seqlock(&rename_lock);
2621 error = prepend_path(path, root, &res, &buflen); 2619 error = prepend_path(path, root, &res, &buflen);
2622 write_sequnlock(&rename_lock); 2620 write_sequnlock(&rename_lock);
2621 br_read_unlock(&vfsmount_lock);
2623 2622
2624 if (error < 0) 2623 if (error < 0)
2625 return ERR_PTR(error); 2624 return ERR_PTR(error);
@@ -2636,9 +2635,11 @@ char *d_absolute_path(const struct path *path,
2636 int error; 2635 int error;
2637 2636
2638 prepend(&res, &buflen, "\0", 1); 2637 prepend(&res, &buflen, "\0", 1);
2638 br_read_lock(&vfsmount_lock);
2639 write_seqlock(&rename_lock); 2639 write_seqlock(&rename_lock);
2640 error = prepend_path(path, &root, &res, &buflen); 2640 error = prepend_path(path, &root, &res, &buflen);
2641 write_sequnlock(&rename_lock); 2641 write_sequnlock(&rename_lock);
2642 br_read_unlock(&vfsmount_lock);
2642 2643
2643 if (error > 1) 2644 if (error > 1)
2644 error = -EINVAL; 2645 error = -EINVAL;
@@ -2702,11 +2703,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
2702 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2703 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2703 2704
2704 get_fs_root(current->fs, &root); 2705 get_fs_root(current->fs, &root);
2706 br_read_lock(&vfsmount_lock);
2705 write_seqlock(&rename_lock); 2707 write_seqlock(&rename_lock);
2706 error = path_with_deleted(path, &root, &res, &buflen); 2708 error = path_with_deleted(path, &root, &res, &buflen);
2709 write_sequnlock(&rename_lock);
2710 br_read_unlock(&vfsmount_lock);
2707 if (error < 0) 2711 if (error < 0)
2708 res = ERR_PTR(error); 2712 res = ERR_PTR(error);
2709 write_sequnlock(&rename_lock);
2710 path_put(&root); 2713 path_put(&root);
2711 return res; 2714 return res;
2712} 2715}
@@ -2830,6 +2833,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2830 get_fs_root_and_pwd(current->fs, &root, &pwd); 2833 get_fs_root_and_pwd(current->fs, &root, &pwd);
2831 2834
2832 error = -ENOENT; 2835 error = -ENOENT;
2836 br_read_lock(&vfsmount_lock);
2833 write_seqlock(&rename_lock); 2837 write_seqlock(&rename_lock);
2834 if (!d_unlinked(pwd.dentry)) { 2838 if (!d_unlinked(pwd.dentry)) {
2835 unsigned long len; 2839 unsigned long len;
@@ -2839,6 +2843,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2839 prepend(&cwd, &buflen, "\0", 1); 2843 prepend(&cwd, &buflen, "\0", 1);
2840 error = prepend_path(&pwd, &root, &cwd, &buflen); 2844 error = prepend_path(&pwd, &root, &cwd, &buflen);
2841 write_sequnlock(&rename_lock); 2845 write_sequnlock(&rename_lock);
2846 br_read_unlock(&vfsmount_lock);
2842 2847
2843 if (error < 0) 2848 if (error < 0)
2844 goto out; 2849 goto out;
@@ -2859,6 +2864,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2859 } 2864 }
2860 } else { 2865 } else {
2861 write_sequnlock(&rename_lock); 2866 write_sequnlock(&rename_lock);
2867 br_read_unlock(&vfsmount_lock);
2862 } 2868 }
2863 2869
2864out: 2870out:
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 56efcaadf848..9c6d06dcef8b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2999,20 +2999,23 @@ static int ext4_split_extent_at(handle_t *handle,
2999 if (split_flag & EXT4_EXT_DATA_VALID1) { 2999 if (split_flag & EXT4_EXT_DATA_VALID1) {
3000 err = ext4_ext_zeroout(inode, ex2); 3000 err = ext4_ext_zeroout(inode, ex2);
3001 zero_ex.ee_block = ex2->ee_block; 3001 zero_ex.ee_block = ex2->ee_block;
3002 zero_ex.ee_len = ext4_ext_get_actual_len(ex2); 3002 zero_ex.ee_len = cpu_to_le16(
3003 ext4_ext_get_actual_len(ex2));
3003 ext4_ext_store_pblock(&zero_ex, 3004 ext4_ext_store_pblock(&zero_ex,
3004 ext4_ext_pblock(ex2)); 3005 ext4_ext_pblock(ex2));
3005 } else { 3006 } else {
3006 err = ext4_ext_zeroout(inode, ex); 3007 err = ext4_ext_zeroout(inode, ex);
3007 zero_ex.ee_block = ex->ee_block; 3008 zero_ex.ee_block = ex->ee_block;
3008 zero_ex.ee_len = ext4_ext_get_actual_len(ex); 3009 zero_ex.ee_len = cpu_to_le16(
3010 ext4_ext_get_actual_len(ex));
3009 ext4_ext_store_pblock(&zero_ex, 3011 ext4_ext_store_pblock(&zero_ex,
3010 ext4_ext_pblock(ex)); 3012 ext4_ext_pblock(ex));
3011 } 3013 }
3012 } else { 3014 } else {
3013 err = ext4_ext_zeroout(inode, &orig_ex); 3015 err = ext4_ext_zeroout(inode, &orig_ex);
3014 zero_ex.ee_block = orig_ex.ee_block; 3016 zero_ex.ee_block = orig_ex.ee_block;
3015 zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex); 3017 zero_ex.ee_len = cpu_to_le16(
3018 ext4_ext_get_actual_len(&orig_ex));
3016 ext4_ext_store_pblock(&zero_ex, 3019 ext4_ext_store_pblock(&zero_ex,
3017 ext4_ext_pblock(&orig_ex)); 3020 ext4_ext_pblock(&orig_ex));
3018 } 3021 }
@@ -3272,7 +3275,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3272 if (err) 3275 if (err)
3273 goto out; 3276 goto out;
3274 zero_ex.ee_block = ex->ee_block; 3277 zero_ex.ee_block = ex->ee_block;
3275 zero_ex.ee_len = ext4_ext_get_actual_len(ex); 3278 zero_ex.ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex));
3276 ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex)); 3279 ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
3277 3280
3278 err = ext4_ext_get_access(handle, inode, path + depth); 3281 err = ext4_ext_get_access(handle, inode, path + depth);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index b505a145a593..a04183127ef0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1539,9 +1539,9 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
1539 blk = *i_data; 1539 blk = *i_data;
1540 if (level > 0) { 1540 if (level > 0) {
1541 ext4_lblk_t first2; 1541 ext4_lblk_t first2;
1542 bh = sb_bread(inode->i_sb, blk); 1542 bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
1543 if (!bh) { 1543 if (!bh) {
1544 EXT4_ERROR_INODE_BLOCK(inode, blk, 1544 EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
1545 "Read failure"); 1545 "Read failure");
1546 return -EIO; 1546 return -EIO;
1547 } 1547 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 019f45e45097..d79c2dadc536 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -923,8 +923,11 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
923 cmd = F_SETLK; 923 cmd = F_SETLK;
924 fl->fl_type = F_UNLCK; 924 fl->fl_type = F_UNLCK;
925 } 925 }
926 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 926 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
927 if (fl->fl_type == F_UNLCK)
928 posix_lock_file_wait(file, fl);
927 return -EIO; 929 return -EIO;
930 }
928 if (IS_GETLK(cmd)) 931 if (IS_GETLK(cmd))
929 return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); 932 return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
930 else if (fl->fl_type == F_UNLCK) 933 else if (fl->fl_type == F_UNLCK)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 156e42ec84ea..5c29216e9cc1 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -588,6 +588,7 @@ struct lm_lockstruct {
588 struct dlm_lksb ls_control_lksb; /* control_lock */ 588 struct dlm_lksb ls_control_lksb; /* control_lock */
589 char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ 589 char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
590 struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ 590 struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
591 char *ls_lvb_bits;
591 592
592 spinlock_t ls_recover_spin; /* protects following fields */ 593 spinlock_t ls_recover_spin; /* protects following fields */
593 unsigned long ls_recover_flags; /* DFL_ */ 594 unsigned long ls_recover_flags; /* DFL_ */
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 9802de0f85e6..c8423d6de6c3 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -483,12 +483,8 @@ static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
483 483
484static int all_jid_bits_clear(char *lvb) 484static int all_jid_bits_clear(char *lvb)
485{ 485{
486 int i; 486 return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0,
487 for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) { 487 GDLM_LVB_SIZE - JID_BITMAP_OFFSET);
488 if (lvb[i])
489 return 0;
490 }
491 return 1;
492} 488}
493 489
494static void sync_wait_cb(void *arg) 490static void sync_wait_cb(void *arg)
@@ -580,7 +576,6 @@ static void gfs2_control_func(struct work_struct *work)
580{ 576{
581 struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); 577 struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
582 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 578 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
583 char lvb_bits[GDLM_LVB_SIZE];
584 uint32_t block_gen, start_gen, lvb_gen, flags; 579 uint32_t block_gen, start_gen, lvb_gen, flags;
585 int recover_set = 0; 580 int recover_set = 0;
586 int write_lvb = 0; 581 int write_lvb = 0;
@@ -634,7 +629,7 @@ static void gfs2_control_func(struct work_struct *work)
634 return; 629 return;
635 } 630 }
636 631
637 control_lvb_read(ls, &lvb_gen, lvb_bits); 632 control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
638 633
639 spin_lock(&ls->ls_recover_spin); 634 spin_lock(&ls->ls_recover_spin);
640 if (block_gen != ls->ls_recover_block || 635 if (block_gen != ls->ls_recover_block ||
@@ -664,10 +659,10 @@ static void gfs2_control_func(struct work_struct *work)
664 659
665 ls->ls_recover_result[i] = 0; 660 ls->ls_recover_result[i] = 0;
666 661
667 if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) 662 if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET))
668 continue; 663 continue;
669 664
670 __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); 665 __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
671 write_lvb = 1; 666 write_lvb = 1;
672 } 667 }
673 } 668 }
@@ -691,7 +686,7 @@ static void gfs2_control_func(struct work_struct *work)
691 continue; 686 continue;
692 if (ls->ls_recover_submit[i] < start_gen) { 687 if (ls->ls_recover_submit[i] < start_gen) {
693 ls->ls_recover_submit[i] = 0; 688 ls->ls_recover_submit[i] = 0;
694 __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); 689 __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
695 } 690 }
696 } 691 }
697 /* even if there are no bits to set, we need to write the 692 /* even if there are no bits to set, we need to write the
@@ -705,7 +700,7 @@ static void gfs2_control_func(struct work_struct *work)
705 spin_unlock(&ls->ls_recover_spin); 700 spin_unlock(&ls->ls_recover_spin);
706 701
707 if (write_lvb) { 702 if (write_lvb) {
708 control_lvb_write(ls, start_gen, lvb_bits); 703 control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
709 flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; 704 flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
710 } else { 705 } else {
711 flags = DLM_LKF_CONVERT; 706 flags = DLM_LKF_CONVERT;
@@ -725,7 +720,7 @@ static void gfs2_control_func(struct work_struct *work)
725 */ 720 */
726 721
727 for (i = 0; i < recover_size; i++) { 722 for (i = 0; i < recover_size; i++) {
728 if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) { 723 if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) {
729 fs_info(sdp, "recover generation %u jid %d\n", 724 fs_info(sdp, "recover generation %u jid %d\n",
730 start_gen, i); 725 start_gen, i);
731 gfs2_recover_set(sdp, i); 726 gfs2_recover_set(sdp, i);
@@ -758,7 +753,6 @@ static void gfs2_control_func(struct work_struct *work)
758static int control_mount(struct gfs2_sbd *sdp) 753static int control_mount(struct gfs2_sbd *sdp)
759{ 754{
760 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 755 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
761 char lvb_bits[GDLM_LVB_SIZE];
762 uint32_t start_gen, block_gen, mount_gen, lvb_gen; 756 uint32_t start_gen, block_gen, mount_gen, lvb_gen;
763 int mounted_mode; 757 int mounted_mode;
764 int retries = 0; 758 int retries = 0;
@@ -857,7 +851,7 @@ locks_done:
857 * lvb_gen will be non-zero. 851 * lvb_gen will be non-zero.
858 */ 852 */
859 853
860 control_lvb_read(ls, &lvb_gen, lvb_bits); 854 control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
861 855
862 if (lvb_gen == 0xFFFFFFFF) { 856 if (lvb_gen == 0xFFFFFFFF) {
863 /* special value to force mount attempts to fail */ 857 /* special value to force mount attempts to fail */
@@ -887,7 +881,7 @@ locks_done:
887 * and all lvb bits to be clear (no pending journal recoveries.) 881 * and all lvb bits to be clear (no pending journal recoveries.)
888 */ 882 */
889 883
890 if (!all_jid_bits_clear(lvb_bits)) { 884 if (!all_jid_bits_clear(ls->ls_lvb_bits)) {
891 /* journals need recovery, wait until all are clear */ 885 /* journals need recovery, wait until all are clear */
892 fs_info(sdp, "control_mount wait for journal recovery\n"); 886 fs_info(sdp, "control_mount wait for journal recovery\n");
893 goto restart; 887 goto restart;
@@ -949,7 +943,6 @@ static int dlm_recovery_wait(void *word)
949static int control_first_done(struct gfs2_sbd *sdp) 943static int control_first_done(struct gfs2_sbd *sdp)
950{ 944{
951 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 945 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
952 char lvb_bits[GDLM_LVB_SIZE];
953 uint32_t start_gen, block_gen; 946 uint32_t start_gen, block_gen;
954 int error; 947 int error;
955 948
@@ -991,8 +984,8 @@ restart:
991 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); 984 memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
992 spin_unlock(&ls->ls_recover_spin); 985 spin_unlock(&ls->ls_recover_spin);
993 986
994 memset(lvb_bits, 0, sizeof(lvb_bits)); 987 memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE);
995 control_lvb_write(ls, start_gen, lvb_bits); 988 control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
996 989
997 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); 990 error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
998 if (error) 991 if (error)
@@ -1022,6 +1015,12 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
1022 uint32_t old_size, new_size; 1015 uint32_t old_size, new_size;
1023 int i, max_jid; 1016 int i, max_jid;
1024 1017
1018 if (!ls->ls_lvb_bits) {
1019 ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
1020 if (!ls->ls_lvb_bits)
1021 return -ENOMEM;
1022 }
1023
1025 max_jid = 0; 1024 max_jid = 0;
1026 for (i = 0; i < num_slots; i++) { 1025 for (i = 0; i < num_slots; i++) {
1027 if (max_jid < slots[i].slot - 1) 1026 if (max_jid < slots[i].slot - 1)
@@ -1057,6 +1056,7 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
1057 1056
1058static void free_recover_size(struct lm_lockstruct *ls) 1057static void free_recover_size(struct lm_lockstruct *ls)
1059{ 1058{
1059 kfree(ls->ls_lvb_bits);
1060 kfree(ls->ls_recover_submit); 1060 kfree(ls->ls_recover_submit);
1061 kfree(ls->ls_recover_result); 1061 kfree(ls->ls_recover_result);
1062 ls->ls_recover_submit = NULL; 1062 ls->ls_recover_submit = NULL;
@@ -1205,6 +1205,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
1205 ls->ls_recover_size = 0; 1205 ls->ls_recover_size = 0;
1206 ls->ls_recover_submit = NULL; 1206 ls->ls_recover_submit = NULL;
1207 ls->ls_recover_result = NULL; 1207 ls->ls_recover_result = NULL;
1208 ls->ls_lvb_bits = NULL;
1208 1209
1209 error = set_recover_size(sdp, NULL, 0); 1210 error = set_recover_size(sdp, NULL, 0);
1210 if (error) 1211 if (error)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index d1f51fd73f86..5a51265a4341 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -576,7 +576,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
576 RB_CLEAR_NODE(&ip->i_res->rs_node); 576 RB_CLEAR_NODE(&ip->i_res->rs_node);
577out: 577out:
578 up_write(&ip->i_rw_mutex); 578 up_write(&ip->i_rw_mutex);
579 return 0; 579 return error;
580} 580}
581 581
582static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) 582static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
@@ -1181,12 +1181,9 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1181 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) 1181 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
1182{ 1182{
1183 struct super_block *sb = sdp->sd_vfs; 1183 struct super_block *sb = sdp->sd_vfs;
1184 struct block_device *bdev = sb->s_bdev;
1185 const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
1186 bdev_logical_block_size(sb->s_bdev);
1187 u64 blk; 1184 u64 blk;
1188 sector_t start = 0; 1185 sector_t start = 0;
1189 sector_t nr_sects = 0; 1186 sector_t nr_blks = 0;
1190 int rv; 1187 int rv;
1191 unsigned int x; 1188 unsigned int x;
1192 u32 trimmed = 0; 1189 u32 trimmed = 0;
@@ -1206,35 +1203,34 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
1206 if (diff == 0) 1203 if (diff == 0)
1207 continue; 1204 continue;
1208 blk = offset + ((bi->bi_start + x) * GFS2_NBBY); 1205 blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
1209 blk *= sects_per_blk; /* convert to sectors */
1210 while(diff) { 1206 while(diff) {
1211 if (diff & 1) { 1207 if (diff & 1) {
1212 if (nr_sects == 0) 1208 if (nr_blks == 0)
1213 goto start_new_extent; 1209 goto start_new_extent;
1214 if ((start + nr_sects) != blk) { 1210 if ((start + nr_blks) != blk) {
1215 if (nr_sects >= minlen) { 1211 if (nr_blks >= minlen) {
1216 rv = blkdev_issue_discard(bdev, 1212 rv = sb_issue_discard(sb,
1217 start, nr_sects, 1213 start, nr_blks,
1218 GFP_NOFS, 0); 1214 GFP_NOFS, 0);
1219 if (rv) 1215 if (rv)
1220 goto fail; 1216 goto fail;
1221 trimmed += nr_sects; 1217 trimmed += nr_blks;
1222 } 1218 }
1223 nr_sects = 0; 1219 nr_blks = 0;
1224start_new_extent: 1220start_new_extent:
1225 start = blk; 1221 start = blk;
1226 } 1222 }
1227 nr_sects += sects_per_blk; 1223 nr_blks++;
1228 } 1224 }
1229 diff >>= 2; 1225 diff >>= 2;
1230 blk += sects_per_blk; 1226 blk++;
1231 } 1227 }
1232 } 1228 }
1233 if (nr_sects >= minlen) { 1229 if (nr_blks >= minlen) {
1234 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 1230 rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
1235 if (rv) 1231 if (rv)
1236 goto fail; 1232 goto fail;
1237 trimmed += nr_sects; 1233 trimmed += nr_blks;
1238 } 1234 }
1239 if (ptrimmed) 1235 if (ptrimmed)
1240 *ptrimmed = trimmed; 1236 *ptrimmed = trimmed;
diff --git a/fs/internal.h b/fs/internal.h
index 507141fceb99..4be78237d896 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,3 +125,8 @@ extern int invalidate_inodes(struct super_block *, bool);
125 * dcache.c 125 * dcache.c
126 */ 126 */
127extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); 127extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
128
129/*
130 * read_write.c
131 */
132extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
diff --git a/fs/namespace.c b/fs/namespace.c
index 50ca17d3cb45..d581e45c0a9f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -798,6 +798,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
798 } 798 }
799 799
800 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; 800 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
801 /* Don't allow unprivileged users to change mount flags */
802 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
803 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
804
801 atomic_inc(&sb->s_active); 805 atomic_inc(&sb->s_active);
802 mnt->mnt.mnt_sb = sb; 806 mnt->mnt.mnt_sb = sb;
803 mnt->mnt.mnt_root = dget(root); 807 mnt->mnt.mnt_root = dget(root);
@@ -1713,6 +1717,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1713 if (readonly_request == __mnt_is_readonly(mnt)) 1717 if (readonly_request == __mnt_is_readonly(mnt))
1714 return 0; 1718 return 0;
1715 1719
1720 if (mnt->mnt_flags & MNT_LOCK_READONLY)
1721 return -EPERM;
1722
1716 if (readonly_request) 1723 if (readonly_request)
1717 error = mnt_make_readonly(real_mount(mnt)); 1724 error = mnt_make_readonly(real_mount(mnt));
1718 else 1725 else
@@ -2339,7 +2346,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2339 /* First pass: copy the tree topology */ 2346 /* First pass: copy the tree topology */
2340 copy_flags = CL_COPY_ALL | CL_EXPIRE; 2347 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2341 if (user_ns != mnt_ns->user_ns) 2348 if (user_ns != mnt_ns->user_ns)
2342 copy_flags |= CL_SHARED_TO_SLAVE; 2349 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2343 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 2350 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2344 if (IS_ERR(new)) { 2351 if (IS_ERR(new)) {
2345 up_write(&namespace_sem); 2352 up_write(&namespace_sem);
@@ -2732,6 +2739,51 @@ bool our_mnt(struct vfsmount *mnt)
2732 return check_mnt(real_mount(mnt)); 2739 return check_mnt(real_mount(mnt));
2733} 2740}
2734 2741
2742bool current_chrooted(void)
2743{
2744 /* Does the current process have a non-standard root */
2745 struct path ns_root;
2746 struct path fs_root;
2747 bool chrooted;
2748
2749 /* Find the namespace root */
2750 ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
2751 ns_root.dentry = ns_root.mnt->mnt_root;
2752 path_get(&ns_root);
2753 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
2754 ;
2755
2756 get_fs_root(current->fs, &fs_root);
2757
2758 chrooted = !path_equal(&fs_root, &ns_root);
2759
2760 path_put(&fs_root);
2761 path_put(&ns_root);
2762
2763 return chrooted;
2764}
2765
2766void update_mnt_policy(struct user_namespace *userns)
2767{
2768 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
2769 struct mount *mnt;
2770
2771 down_read(&namespace_sem);
2772 list_for_each_entry(mnt, &ns->list, mnt_list) {
2773 switch (mnt->mnt.mnt_sb->s_magic) {
2774 case SYSFS_MAGIC:
2775 userns->may_mount_sysfs = true;
2776 break;
2777 case PROC_SUPER_MAGIC:
2778 userns->may_mount_proc = true;
2779 break;
2780 }
2781 if (userns->may_mount_sysfs && userns->may_mount_proc)
2782 break;
2783 }
2784 up_read(&namespace_sem);
2785}
2786
2735static void *mntns_get(struct task_struct *task) 2787static void *mntns_get(struct task_struct *task)
2736{ 2788{
2737 struct mnt_namespace *ns = NULL; 2789 struct mnt_namespace *ns = NULL;
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c
index 737d839bc17b..6fc7b5cae92b 100644
--- a/fs/nfs/blocklayout/blocklayoutdm.c
+++ b/fs/nfs/blocklayout/blocklayoutdm.c
@@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev)
55 55
56 bl_pipe_msg.bl_wq = &nn->bl_wq; 56 bl_pipe_msg.bl_wq = &nn->bl_wq;
57 memset(msg, 0, sizeof(*msg)); 57 memset(msg, 0, sizeof(*msg));
58 msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); 58 msg->len = sizeof(bl_msg) + bl_msg.totallen;
59 msg->data = kzalloc(msg->len, GFP_NOFS);
59 if (!msg->data) 60 if (!msg->data)
60 goto out; 61 goto out;
61 62
@@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev)
66 memcpy(msg->data, &bl_msg, sizeof(bl_msg)); 67 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
67 dataptr = (uint8_t *) msg->data; 68 dataptr = (uint8_t *) msg->data;
68 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); 69 memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request));
69 msg->len = sizeof(bl_msg) + bl_msg.totallen;
70 70
71 add_wait_queue(&nn->bl_wq, &wq); 71 add_wait_queue(&nn->bl_wq, &wq);
72 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { 72 if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) {
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dc0f98dfa717..c516da5873fd 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -726,9 +726,9 @@ out1:
726 return ret; 726 return ret;
727} 727}
728 728
729static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) 729static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
730{ 730{
731 return key_instantiate_and_link(key, data, strlen(data) + 1, 731 return key_instantiate_and_link(key, data, datalen,
732 id_resolver_cache->thread_keyring, 732 id_resolver_cache->thread_keyring,
733 authkey); 733 authkey);
734} 734}
@@ -738,6 +738,7 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
738 struct key *key, struct key *authkey) 738 struct key *key, struct key *authkey)
739{ 739{
740 char id_str[NFS_UINT_MAXLEN]; 740 char id_str[NFS_UINT_MAXLEN];
741 size_t len;
741 int ret = -ENOKEY; 742 int ret = -ENOKEY;
742 743
743 /* ret = -ENOKEY */ 744 /* ret = -ENOKEY */
@@ -747,13 +748,15 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
747 case IDMAP_CONV_NAMETOID: 748 case IDMAP_CONV_NAMETOID:
748 if (strcmp(upcall->im_name, im->im_name) != 0) 749 if (strcmp(upcall->im_name, im->im_name) != 0)
749 break; 750 break;
750 sprintf(id_str, "%d", im->im_id); 751 /* Note: here we store the NUL terminator too */
751 ret = nfs_idmap_instantiate(key, authkey, id_str); 752 len = sprintf(id_str, "%d", im->im_id) + 1;
753 ret = nfs_idmap_instantiate(key, authkey, id_str, len);
752 break; 754 break;
753 case IDMAP_CONV_IDTONAME: 755 case IDMAP_CONV_IDTONAME:
754 if (upcall->im_id != im->im_id) 756 if (upcall->im_id != im->im_id)
755 break; 757 break;
756 ret = nfs_idmap_instantiate(key, authkey, im->im_name); 758 len = strlen(im->im_name);
759 ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
757 break; 760 break;
758 default: 761 default:
759 ret = -EINVAL; 762 ret = -EINVAL;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb044c109..4fb234d3aefb 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
129{ 129{
130 if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) 130 if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
131 return; 131 return;
132 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
133 pnfs_return_layout(inode); 132 pnfs_return_layout(inode);
134} 133}
135 134
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb0f901..26431cf62ddb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2632,7 +2632,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2632 int status; 2632 int status;
2633 2633
2634 if (pnfs_ld_layoutret_on_setattr(inode)) 2634 if (pnfs_ld_layoutret_on_setattr(inode))
2635 pnfs_return_layout(inode); 2635 pnfs_commit_and_return_layout(inode);
2636 2636
2637 nfs_fattr_init(fattr); 2637 nfs_fattr_init(fattr);
2638 2638
@@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
6416static void nfs4_layoutcommit_release(void *calldata) 6416static void nfs4_layoutcommit_release(void *calldata)
6417{ 6417{
6418 struct nfs4_layoutcommit_data *data = calldata; 6418 struct nfs4_layoutcommit_data *data = calldata;
6419 struct pnfs_layout_segment *lseg, *tmp;
6420 unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
6421 6419
6422 pnfs_cleanup_layoutcommit(data); 6420 pnfs_cleanup_layoutcommit(data);
6423 /* Matched by references in pnfs_set_layoutcommit */
6424 list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
6425 list_del_init(&lseg->pls_lc_list);
6426 if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
6427 &lseg->pls_flags))
6428 pnfs_put_lseg(lseg);
6429 }
6430
6431 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
6432 smp_mb__after_clear_bit();
6433 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
6434
6435 put_rpccred(data->cred); 6421 put_rpccred(data->cred);
6436 kfree(data); 6422 kfree(data);
6437} 6423}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aad6258..4bdffe0ba025 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
417 lo_seg_intersecting(lseg_range, recall_range); 417 lo_seg_intersecting(lseg_range, recall_range);
418} 418}
419 419
420static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
421 struct list_head *tmp_list)
422{
423 if (!atomic_dec_and_test(&lseg->pls_refcount))
424 return false;
425 pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
426 list_add(&lseg->pls_list, tmp_list);
427 return true;
428}
429
420/* Returns 1 if lseg is removed from list, 0 otherwise */ 430/* Returns 1 if lseg is removed from list, 0 otherwise */
421static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, 431static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
422 struct list_head *tmp_list) 432 struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
430 */ 440 */
431 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 441 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
432 atomic_read(&lseg->pls_refcount)); 442 atomic_read(&lseg->pls_refcount));
433 if (atomic_dec_and_test(&lseg->pls_refcount)) { 443 if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list))
434 pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
435 list_add(&lseg->pls_list, tmp_list);
436 rv = 1; 444 rv = 1;
437 }
438 } 445 }
439 return rv; 446 return rv;
440} 447}
@@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
777 return lseg; 784 return lseg;
778} 785}
779 786
787static void pnfs_clear_layoutcommit(struct inode *inode,
788 struct list_head *head)
789{
790 struct nfs_inode *nfsi = NFS_I(inode);
791 struct pnfs_layout_segment *lseg, *tmp;
792
793 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
794 return;
795 list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
796 if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
797 continue;
798 pnfs_lseg_dec_and_remove_zero(lseg, head);
799 }
800}
801
780/* 802/*
781 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 803 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
782 * when the layout segment list is empty. 804 * when the layout segment list is empty.
@@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino)
808 /* Reference matched in nfs4_layoutreturn_release */ 830 /* Reference matched in nfs4_layoutreturn_release */
809 pnfs_get_layout_hdr(lo); 831 pnfs_get_layout_hdr(lo);
810 empty = list_empty(&lo->plh_segs); 832 empty = list_empty(&lo->plh_segs);
833 pnfs_clear_layoutcommit(ino, &tmp_list);
811 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); 834 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
812 /* Don't send a LAYOUTRETURN if list was initially empty */ 835 /* Don't send a LAYOUTRETURN if list was initially empty */
813 if (empty) { 836 if (empty) {
@@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino)
820 spin_unlock(&ino->i_lock); 843 spin_unlock(&ino->i_lock);
821 pnfs_free_lseg_list(&tmp_list); 844 pnfs_free_lseg_list(&tmp_list);
822 845
823 WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
824
825 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 846 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
826 if (unlikely(lrp == NULL)) { 847 if (unlikely(lrp == NULL)) {
827 status = -ENOMEM; 848 status = -ENOMEM;
@@ -845,6 +866,33 @@ out:
845} 866}
846EXPORT_SYMBOL_GPL(_pnfs_return_layout); 867EXPORT_SYMBOL_GPL(_pnfs_return_layout);
847 868
869int
870pnfs_commit_and_return_layout(struct inode *inode)
871{
872 struct pnfs_layout_hdr *lo;
873 int ret;
874
875 spin_lock(&inode->i_lock);
876 lo = NFS_I(inode)->layout;
877 if (lo == NULL) {
878 spin_unlock(&inode->i_lock);
879 return 0;
880 }
881 pnfs_get_layout_hdr(lo);
882 /* Block new layoutgets and read/write to ds */
883 lo->plh_block_lgets++;
884 spin_unlock(&inode->i_lock);
885 filemap_fdatawait(inode->i_mapping);
886 ret = pnfs_layoutcommit_inode(inode, true);
887 if (ret == 0)
888 ret = _pnfs_return_layout(inode);
889 spin_lock(&inode->i_lock);
890 lo->plh_block_lgets--;
891 spin_unlock(&inode->i_lock);
892 pnfs_put_layout_hdr(lo);
893 return ret;
894}
895
848bool pnfs_roc(struct inode *ino) 896bool pnfs_roc(struct inode *ino)
849{ 897{
850 struct pnfs_layout_hdr *lo; 898 struct pnfs_layout_hdr *lo;
@@ -1458,7 +1506,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
1458 dprintk("pnfs write error = %d\n", hdr->pnfs_error); 1506 dprintk("pnfs write error = %d\n", hdr->pnfs_error);
1459 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1507 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1460 PNFS_LAYOUTRET_ON_ERROR) { 1508 PNFS_LAYOUTRET_ON_ERROR) {
1461 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1462 pnfs_return_layout(hdr->inode); 1509 pnfs_return_layout(hdr->inode);
1463 } 1510 }
1464 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1511 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1660,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1613 dprintk("pnfs read error = %d\n", hdr->pnfs_error); 1660 dprintk("pnfs read error = %d\n", hdr->pnfs_error);
1614 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & 1661 if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
1615 PNFS_LAYOUTRET_ON_ERROR) { 1662 PNFS_LAYOUTRET_ON_ERROR) {
1616 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1617 pnfs_return_layout(hdr->inode); 1663 pnfs_return_layout(hdr->inode);
1618 } 1664 }
1619 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) 1665 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1746,11 +1792,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
1746 1792
1747 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { 1793 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
1748 if (lseg->pls_range.iomode == IOMODE_RW && 1794 if (lseg->pls_range.iomode == IOMODE_RW &&
1749 test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) 1795 test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
1750 list_add(&lseg->pls_lc_list, listp); 1796 list_add(&lseg->pls_lc_list, listp);
1751 } 1797 }
1752} 1798}
1753 1799
1800static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
1801{
1802 struct pnfs_layout_segment *lseg, *tmp;
1803 unsigned long *bitlock = &NFS_I(inode)->flags;
1804
1805 /* Matched by references in pnfs_set_layoutcommit */
1806 list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
1807 list_del_init(&lseg->pls_lc_list);
1808 pnfs_put_lseg(lseg);
1809 }
1810
1811 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
1812 smp_mb__after_clear_bit();
1813 wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
1814}
1815
1754void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 1816void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1755{ 1817{
1756 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); 1818 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1857,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
1795 1857
1796 if (nfss->pnfs_curr_ld->cleanup_layoutcommit) 1858 if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
1797 nfss->pnfs_curr_ld->cleanup_layoutcommit(data); 1859 nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
1860 pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
1798} 1861}
1799 1862
1800/* 1863/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba80417748..f5f8a470a647 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
219void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 219void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
220int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 220int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
221int _pnfs_return_layout(struct inode *); 221int _pnfs_return_layout(struct inode *);
222int pnfs_commit_and_return_layout(struct inode *);
222void pnfs_ld_write_done(struct nfs_write_data *); 223void pnfs_ld_write_done(struct nfs_write_data *);
223void pnfs_ld_read_done(struct nfs_read_data *); 224void pnfs_ld_read_done(struct nfs_read_data *);
224struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, 225struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
407 return 0; 408 return 0;
408} 409}
409 410
411static inline int pnfs_commit_and_return_layout(struct inode *inode)
412{
413 return 0;
414}
415
410static inline bool 416static inline bool
411pnfs_ld_layoutret_on_setattr(struct inode *inode) 417pnfs_ld_layoutret_on_setattr(struct inode *inode)
412{ 418{
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 01168865dd37..a2720071f282 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -264,7 +264,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
264 iattr->ia_valid |= ATTR_SIZE; 264 iattr->ia_valid |= ATTR_SIZE;
265 } 265 }
266 if (bmval[0] & FATTR4_WORD0_ACL) { 266 if (bmval[0] & FATTR4_WORD0_ACL) {
267 int nace; 267 u32 nace;
268 struct nfs4_ace *ace; 268 struct nfs4_ace *ace;
269 269
270 READ_BUF(4); len += 4; 270 READ_BUF(4); len += 4;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 62c1ee128aeb..ca05f6dc3544 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -102,7 +102,8 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
102{ 102{
103 if (rp->c_type == RC_REPLBUFF) 103 if (rp->c_type == RC_REPLBUFF)
104 kfree(rp->c_replvec.iov_base); 104 kfree(rp->c_replvec.iov_base);
105 hlist_del(&rp->c_hash); 105 if (!hlist_unhashed(&rp->c_hash))
106 hlist_del(&rp->c_hash);
106 list_del(&rp->c_lru); 107 list_del(&rp->c_lru);
107 --num_drc_entries; 108 --num_drc_entries;
108 kmem_cache_free(drc_slab, rp); 109 kmem_cache_free(drc_slab, rp);
@@ -118,6 +119,10 @@ nfsd_reply_cache_free(struct svc_cacherep *rp)
118 119
119int nfsd_reply_cache_init(void) 120int nfsd_reply_cache_init(void)
120{ 121{
122 INIT_LIST_HEAD(&lru_head);
123 max_drc_entries = nfsd_cache_size_limit();
124 num_drc_entries = 0;
125
121 register_shrinker(&nfsd_reply_cache_shrinker); 126 register_shrinker(&nfsd_reply_cache_shrinker);
122 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 127 drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
123 0, 0, NULL); 128 0, 0, NULL);
@@ -128,10 +133,6 @@ int nfsd_reply_cache_init(void)
128 if (!cache_hash) 133 if (!cache_hash)
129 goto out_nomem; 134 goto out_nomem;
130 135
131 INIT_LIST_HEAD(&lru_head);
132 max_drc_entries = nfsd_cache_size_limit();
133 num_drc_entries = 0;
134
135 return 0; 136 return 0;
136out_nomem: 137out_nomem:
137 printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); 138 printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2a7eb536de0b..2b2e2396a869 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1013,6 +1013,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1013 int host_err; 1013 int host_err;
1014 int stable = *stablep; 1014 int stable = *stablep;
1015 int use_wgather; 1015 int use_wgather;
1016 loff_t pos = offset;
1016 1017
1017 dentry = file->f_path.dentry; 1018 dentry = file->f_path.dentry;
1018 inode = dentry->d_inode; 1019 inode = dentry->d_inode;
@@ -1025,7 +1026,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1025 1026
1026 /* Write the data. */ 1027 /* Write the data. */
1027 oldfs = get_fs(); set_fs(KERNEL_DS); 1028 oldfs = get_fs(); set_fs(KERNEL_DS);
1028 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); 1029 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos);
1029 set_fs(oldfs); 1030 set_fs(oldfs);
1030 if (host_err < 0) 1031 if (host_err < 0)
1031 goto out_nfserr; 1032 goto out_nfserr;
diff --git a/fs/pnode.c b/fs/pnode.c
index 3e000a51ac0d..8b29d2164da6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -9,6 +9,7 @@
9#include <linux/mnt_namespace.h> 9#include <linux/mnt_namespace.h>
10#include <linux/mount.h> 10#include <linux/mount.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/nsproxy.h>
12#include "internal.h" 13#include "internal.h"
13#include "pnode.h" 14#include "pnode.h"
14 15
@@ -220,6 +221,7 @@ static struct mount *get_source(struct mount *dest,
220int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, 221int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
221 struct mount *source_mnt, struct list_head *tree_list) 222 struct mount *source_mnt, struct list_head *tree_list)
222{ 223{
224 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
223 struct mount *m, *child; 225 struct mount *m, *child;
224 int ret = 0; 226 int ret = 0;
225 struct mount *prev_dest_mnt = dest_mnt; 227 struct mount *prev_dest_mnt = dest_mnt;
@@ -237,6 +239,10 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
237 239
238 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 240 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
239 241
242 /* Notice when we are propagating across user namespaces */
243 if (m->mnt_ns->user_ns != user_ns)
244 type |= CL_UNPRIVILEGED;
245
240 child = copy_tree(source, source->mnt.mnt_root, type); 246 child = copy_tree(source, source->mnt.mnt_root, type);
241 if (IS_ERR(child)) { 247 if (IS_ERR(child)) {
242 ret = PTR_ERR(child); 248 ret = PTR_ERR(child);
diff --git a/fs/pnode.h b/fs/pnode.h
index 19b853a3445c..a0493d5ebfbf 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -23,6 +23,7 @@
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PRIVATE 0x10 24#define CL_PRIVATE 0x10
25#define CL_SHARED_TO_SLAVE 0x20 25#define CL_SHARED_TO_SLAVE 0x20
26#define CL_UNPRIVILEGED 0x40
26 27
27static inline void set_mnt_shared(struct mount *mnt) 28static inline void set_mnt_shared(struct mount *mnt)
28{ 29{
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6e9fac26bac..9c7fab1d23f0 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -16,6 +16,7 @@
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/bitops.h> 18#include <linux/bitops.h>
19#include <linux/user_namespace.h>
19#include <linux/mount.h> 20#include <linux/mount.h>
20#include <linux/pid_namespace.h> 21#include <linux/pid_namespace.h>
21#include <linux/parser.h> 22#include <linux/parser.h>
@@ -108,6 +109,9 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
108 } else { 109 } else {
109 ns = task_active_pid_ns(current); 110 ns = task_active_pid_ns(current);
110 options = data; 111 options = data;
112
113 if (!current_user_ns()->may_mount_proc)
114 return ERR_PTR(-EPERM);
111 } 115 }
112 116
113 sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); 117 sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
diff --git a/fs/read_write.c b/fs/read_write.c
index a698eff457fb..e6ddc8dceb96 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
17#include <linux/splice.h> 17#include <linux/splice.h>
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include "read_write.h" 19#include "read_write.h"
20#include "internal.h"
20 21
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include <asm/unistd.h> 23#include <asm/unistd.h>
@@ -417,6 +418,33 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
417 418
418EXPORT_SYMBOL(do_sync_write); 419EXPORT_SYMBOL(do_sync_write);
419 420
421ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
422{
423 mm_segment_t old_fs;
424 const char __user *p;
425 ssize_t ret;
426
427 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
428 return -EINVAL;
429
430 old_fs = get_fs();
431 set_fs(get_ds());
432 p = (__force const char __user *)buf;
433 if (count > MAX_RW_COUNT)
434 count = MAX_RW_COUNT;
435 if (file->f_op->write)
436 ret = file->f_op->write(file, p, count, pos);
437 else
438 ret = do_sync_write(file, p, count, pos);
439 set_fs(old_fs);
440 if (ret > 0) {
441 fsnotify_modify(file);
442 add_wchar(current, ret);
443 }
444 inc_syscw(current);
445 return ret;
446}
447
420ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 448ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
421{ 449{
422 ssize_t ret; 450 ssize_t ret;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index c196369fe408..4cce1d9552fb 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -187,8 +187,8 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
187 if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) 187 if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
188 return -ENOSPC; 188 return -ENOSPC;
189 189
190 if (name[0] == '.' && (name[1] == '\0' || 190 if (name[0] == '.' && (namelen < 2 ||
191 (name[1] == '.' && name[2] == '\0'))) 191 (namelen == 2 && name[1] == '.')))
192 return 0; 192 return 0;
193 193
194 dentry = lookup_one_len(name, dbuf->xadir, namelen); 194 dentry = lookup_one_len(name, dbuf->xadir, namelen);
diff --git a/fs/splice.c b/fs/splice.c
index 718bd0056384..29e394e49ddd 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -31,6 +31,7 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/socket.h> 33#include <linux/socket.h>
34#include "internal.h"
34 35
35/* 36/*
36 * Attempt to steal a page from a pipe buffer. This should perhaps go into 37 * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -1048,9 +1049,10 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1048{ 1049{
1049 int ret; 1050 int ret;
1050 void *data; 1051 void *data;
1052 loff_t tmp = sd->pos;
1051 1053
1052 data = buf->ops->map(pipe, buf, 0); 1054 data = buf->ops->map(pipe, buf, 0);
1053 ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); 1055 ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
1054 buf->ops->unmap(pipe, buf, data); 1056 buf->ops->unmap(pipe, buf, data);
1055 1057
1056 return ret; 1058 return ret;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2fbdff6be25c..e14512678c9b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -1020,6 +1020,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1020 ino = parent_sd->s_ino; 1020 ino = parent_sd->s_ino;
1021 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) 1021 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
1022 filp->f_pos++; 1022 filp->f_pos++;
1023 else
1024 return 0;
1023 } 1025 }
1024 if (filp->f_pos == 1) { 1026 if (filp->f_pos == 1) {
1025 if (parent_sd->s_parent) 1027 if (parent_sd->s_parent)
@@ -1028,6 +1030,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1028 ino = parent_sd->s_ino; 1030 ino = parent_sd->s_ino;
1029 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) 1031 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
1030 filp->f_pos++; 1032 filp->f_pos++;
1033 else
1034 return 0;
1031 } 1035 }
1032 mutex_lock(&sysfs_mutex); 1036 mutex_lock(&sysfs_mutex);
1033 for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); 1037 for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
@@ -1058,10 +1062,21 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
1058 return 0; 1062 return 0;
1059} 1063}
1060 1064
1065static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
1066{
1067 struct inode *inode = file_inode(file);
1068 loff_t ret;
1069
1070 mutex_lock(&inode->i_mutex);
1071 ret = generic_file_llseek(file, offset, whence);
1072 mutex_unlock(&inode->i_mutex);
1073
1074 return ret;
1075}
1061 1076
1062const struct file_operations sysfs_dir_operations = { 1077const struct file_operations sysfs_dir_operations = {
1063 .read = generic_read_dir, 1078 .read = generic_read_dir,
1064 .readdir = sysfs_readdir, 1079 .readdir = sysfs_readdir,
1065 .release = sysfs_dir_release, 1080 .release = sysfs_dir_release,
1066 .llseek = generic_file_llseek, 1081 .llseek = sysfs_dir_llseek,
1067}; 1082};
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8d924b5ec733..afd83273e6ce 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/magic.h> 20#include <linux/magic.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/user_namespace.h>
22 23
23#include "sysfs.h" 24#include "sysfs.h"
24 25
@@ -111,6 +112,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
111 struct super_block *sb; 112 struct super_block *sb;
112 int error; 113 int error;
113 114
115 if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs)
116 return ERR_PTR(-EPERM);
117
114 info = kzalloc(sizeof(*info), GFP_KERNEL); 118 info = kzalloc(sizeof(*info), GFP_KERNEL);
115 if (!info) 119 if (!info)
116 return ERR_PTR(-ENOMEM); 120 return ERR_PTR(-ENOMEM);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index ac838b844936..f21acf0ef01f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1568,6 +1568,12 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1568 c->remounting_rw = 1; 1568 c->remounting_rw = 1;
1569 c->ro_mount = 0; 1569 c->ro_mount = 0;
1570 1570
1571 if (c->space_fixup) {
1572 err = ubifs_fixup_free_space(c);
1573 if (err)
1574 return err;
1575 }
1576
1571 err = check_free_space(c); 1577 err = check_free_space(c);
1572 if (err) 1578 if (err)
1573 goto out; 1579 goto out;
@@ -1684,12 +1690,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1684 err = dbg_check_space_info(c); 1690 err = dbg_check_space_info(c);
1685 } 1691 }
1686 1692
1687 if (c->space_fixup) {
1688 err = ubifs_fixup_free_space(c);
1689 if (err)
1690 goto out;
1691 }
1692
1693 mutex_unlock(&c->umount_mutex); 1693 mutex_unlock(&c->umount_mutex);
1694 return err; 1694 return err;
1695 1695