aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/acl.c9
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c129
-rw-r--r--fs/btrfs/extent_io.c84
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/free-space-cache.c132
-rw-r--r--fs/btrfs/inode.c185
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/super.c42
-rw-r--r--fs/btrfs/transaction.c48
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/tree-log.c7
-rw-r--r--fs/btrfs/volumes.c10
-rw-r--r--fs/btrfs/xattr.c33
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/dcache.c87
-rw-r--r--fs/ecryptfs/crypto.c21
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h7
-rw-r--r--fs/ecryptfs/file.c25
-rw-r--r--fs/ecryptfs/inode.c60
-rw-r--r--fs/ecryptfs/kthread.c6
-rw-r--r--fs/ecryptfs/main.c72
-rw-r--r--fs/ecryptfs/super.c16
-rw-r--r--fs/file.c18
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/file.c58
-rw-r--r--fs/gfs2/glock.c6
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/inode.c56
-rw-r--r--fs/gfs2/inode.h3
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/gfs2/super.c14
-rw-r--r--fs/logfs/super.c8
-rw-r--r--fs/nfs/namespace.c4
-rw-r--r--fs/nfs/nfs4_fs.h1
-rw-r--r--fs/nfs/nfs4proc.c118
-rw-r--r--fs/nfs/nfs4state.c51
-rw-r--r--fs/nfs/nfs4xdr.c53
-rw-r--r--fs/nfs/pnfs.c8
-rw-r--r--fs/nfs/super.c13
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/ubifs/log.c20
-rw-r--r--fs/ubifs/recovery.c26
-rw-r--r--fs/ubifs/replay.c18
-rw-r--r--fs/ubifs/super.c44
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c4
55 files changed, 991 insertions, 596 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index de34bfad9ec3..5d505aaa72fb 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
178 178
179 if (value) { 179 if (value) {
180 acl = posix_acl_from_xattr(value, size); 180 acl = posix_acl_from_xattr(value, size);
181 if (acl == NULL) { 181 if (acl) {
182 value = NULL; 182 ret = posix_acl_valid(acl);
183 size = 0; 183 if (ret)
184 goto out;
184 } else if (IS_ERR(acl)) { 185 } else if (IS_ERR(acl)) {
185 return PTR_ERR(acl); 186 return PTR_ERR(acl);
186 } 187 }
187 } 188 }
188 189
189 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); 190 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
190 191out:
191 posix_acl_release(acl); 192 posix_acl_release(acl);
192 193
193 return ret; 194 return ret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3458b5725540..8f4b81de3ae2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -718,7 +718,7 @@ struct btrfs_space_info {
718 u64 total_bytes; /* total bytes in the space, 718 u64 total_bytes; /* total bytes in the space,
719 this doesn't take mirrors into account */ 719 this doesn't take mirrors into account */
720 u64 bytes_used; /* total bytes used, 720 u64 bytes_used; /* total bytes used,
721 this does't take mirrors into account */ 721 this doesn't take mirrors into account */
722 u64 bytes_pinned; /* total bytes pinned, will be freed when the 722 u64 bytes_pinned; /* total bytes pinned, will be freed when the
723 transaction finishes */ 723 transaction finishes */
724 u64 bytes_reserved; /* total bytes the allocator has reserved for 724 u64 bytes_reserved; /* total bytes the allocator has reserved for
@@ -740,8 +740,10 @@ struct btrfs_space_info {
740 */ 740 */
741 unsigned long reservation_progress; 741 unsigned long reservation_progress;
742 742
743 int full; /* indicates that we cannot allocate any more 743 int full:1; /* indicates that we cannot allocate any more
744 chunks for this space */ 744 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */
746
745 int force_alloc; /* set if we need to force a chunk alloc for 747 int force_alloc; /* set if we need to force a chunk alloc for
746 this space */ 748 this space */
747 749
@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2576int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2578int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2577 struct inode *inode, u64 start, u64 end); 2579 struct inode *inode, u64 start, u64 end);
2578int btrfs_release_file(struct inode *inode, struct file *file); 2580int btrfs_release_file(struct inode *inode, struct file *file);
2581void btrfs_drop_pages(struct page **pages, size_t num_pages);
2582int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
2583 struct page **pages, size_t num_pages,
2584 loff_t pos, size_t write_bytes,
2585 struct extent_state **cached);
2579 2586
2580/* tree-defrag.c */ 2587/* tree-defrag.c */
2581int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 2588int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8f1d44ba332f..228cf36ece83 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2824,6 +2824,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
2824 2824
2825 spin_lock(&delayed_refs->lock); 2825 spin_lock(&delayed_refs->lock);
2826 if (delayed_refs->num_entries == 0) { 2826 if (delayed_refs->num_entries == 0) {
2827 spin_unlock(&delayed_refs->lock);
2827 printk(KERN_INFO "delayed_refs has NO entry\n"); 2828 printk(KERN_INFO "delayed_refs has NO entry\n");
2828 return ret; 2829 return ret;
2829 } 2830 }
@@ -3057,7 +3058,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3057 btrfs_destroy_pinned_extent(root, 3058 btrfs_destroy_pinned_extent(root,
3058 root->fs_info->pinned_extents); 3059 root->fs_info->pinned_extents);
3059 3060
3060 t->use_count = 0; 3061 atomic_set(&t->use_count, 0);
3061 list_del_init(&t->list); 3062 list_del_init(&t->list);
3062 memset(t, 0, sizeof(*t)); 3063 memset(t, 0, sizeof(*t));
3063 kmem_cache_free(btrfs_transaction_cachep, t); 3064 kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..cd52f7f556ef 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
33#include "locking.h" 33#include "locking.h"
34#include "free-space-cache.h" 34#include "free-space-cache.h"
35 35
36/* control flags for do_chunk_alloc's force field
37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38 * if we really need one.
39 *
40 * CHUNK_ALLOC_FORCE means it must try to allocate one
41 *
42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
43 * if we have very few chunks already allocated. This is
44 * used as part of the clustering code to help make sure
45 * we have a good pool of storage to cluster in, without
46 * filling the FS with empty chunks
47 *
48 */
49enum {
50 CHUNK_ALLOC_NO_FORCE = 0,
51 CHUNK_ALLOC_FORCE = 1,
52 CHUNK_ALLOC_LIMITED = 2,
53};
54
36static int update_block_group(struct btrfs_trans_handle *trans, 55static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 56 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 57 u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3019 found->bytes_readonly = 0; 3038 found->bytes_readonly = 0;
3020 found->bytes_may_use = 0; 3039 found->bytes_may_use = 0;
3021 found->full = 0; 3040 found->full = 0;
3022 found->force_alloc = 0; 3041 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3042 found->chunk_alloc = 0;
3023 *space_info = found; 3043 *space_info = found;
3024 list_add_rcu(&found->list, &info->space_info); 3044 list_add_rcu(&found->list, &info->space_info);
3025 atomic_set(&found->caching_threads, 0); 3045 atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
3150 if (!data_sinfo->full && alloc_chunk) { 3170 if (!data_sinfo->full && alloc_chunk) {
3151 u64 alloc_target; 3171 u64 alloc_target;
3152 3172
3153 data_sinfo->force_alloc = 1; 3173 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3154 spin_unlock(&data_sinfo->lock); 3174 spin_unlock(&data_sinfo->lock);
3155alloc: 3175alloc:
3156 alloc_target = btrfs_get_alloc_profile(root, 1); 3176 alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
3160 3180
3161 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3181 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3162 bytes + 2 * 1024 * 1024, 3182 bytes + 2 * 1024 * 1024,
3163 alloc_target, 0); 3183 alloc_target,
3184 CHUNK_ALLOC_NO_FORCE);
3164 btrfs_end_transaction(trans, root); 3185 btrfs_end_transaction(trans, root);
3165 if (ret < 0) { 3186 if (ret < 0) {
3166 if (ret != -ENOSPC) 3187 if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3239 rcu_read_lock(); 3260 rcu_read_lock();
3240 list_for_each_entry_rcu(found, head, list) { 3261 list_for_each_entry_rcu(found, head, list) {
3241 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3262 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3242 found->force_alloc = 1; 3263 found->force_alloc = CHUNK_ALLOC_FORCE;
3243 } 3264 }
3244 rcu_read_unlock(); 3265 rcu_read_unlock();
3245} 3266}
3246 3267
3247static int should_alloc_chunk(struct btrfs_root *root, 3268static int should_alloc_chunk(struct btrfs_root *root,
3248 struct btrfs_space_info *sinfo, u64 alloc_bytes) 3269 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3270 int force)
3249{ 3271{
3250 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3272 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3273 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3251 u64 thresh; 3274 u64 thresh;
3252 3275
3253 if (sinfo->bytes_used + sinfo->bytes_reserved + 3276 if (force == CHUNK_ALLOC_FORCE)
3254 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3277 return 1;
3278
3279 /*
3280 * in limited mode, we want to have some free space up to
3281 * about 1% of the FS size.
3282 */
3283 if (force == CHUNK_ALLOC_LIMITED) {
3284 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3285 thresh = max_t(u64, 64 * 1024 * 1024,
3286 div_factor_fine(thresh, 1));
3287
3288 if (num_bytes - num_allocated < thresh)
3289 return 1;
3290 }
3291
3292 /*
3293 * we have two similar checks here, one based on percentage
3294 * and once based on a hard number of 256MB. The idea
3295 * is that if we have a good amount of free
3296 * room, don't allocate a chunk. A good mount is
3297 * less than 80% utilized of the chunks we have allocated,
3298 * or more than 256MB free
3299 */
3300 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3255 return 0; 3301 return 0;
3256 3302
3257 if (sinfo->bytes_used + sinfo->bytes_reserved + 3303 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3258 alloc_bytes < div_factor(num_bytes, 8))
3259 return 0; 3304 return 0;
3260 3305
3261 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3306 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3307
3308 /* 256MB or 5% of the FS */
3262 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3309 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3263 3310
3264 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3311 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3265 return 0; 3312 return 0;
3266
3267 return 1; 3313 return 1;
3268} 3314}
3269 3315
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3273{ 3319{
3274 struct btrfs_space_info *space_info; 3320 struct btrfs_space_info *space_info;
3275 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3321 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3322 int wait_for_alloc = 0;
3276 int ret = 0; 3323 int ret = 0;
3277 3324
3278 mutex_lock(&fs_info->chunk_mutex);
3279
3280 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3325 flags = btrfs_reduce_alloc_profile(extent_root, flags);
3281 3326
3282 space_info = __find_space_info(extent_root->fs_info, flags); 3327 space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3287 } 3332 }
3288 BUG_ON(!space_info); 3333 BUG_ON(!space_info);
3289 3334
3335again:
3290 spin_lock(&space_info->lock); 3336 spin_lock(&space_info->lock);
3291 if (space_info->force_alloc) 3337 if (space_info->force_alloc)
3292 force = 1; 3338 force = space_info->force_alloc;
3293 if (space_info->full) { 3339 if (space_info->full) {
3294 spin_unlock(&space_info->lock); 3340 spin_unlock(&space_info->lock);
3295 goto out; 3341 return 0;
3296 } 3342 }
3297 3343
3298 if (!force && !should_alloc_chunk(extent_root, space_info, 3344 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3299 alloc_bytes)) {
3300 spin_unlock(&space_info->lock); 3345 spin_unlock(&space_info->lock);
3301 goto out; 3346 return 0;
3347 } else if (space_info->chunk_alloc) {
3348 wait_for_alloc = 1;
3349 } else {
3350 space_info->chunk_alloc = 1;
3302 } 3351 }
3352
3303 spin_unlock(&space_info->lock); 3353 spin_unlock(&space_info->lock);
3304 3354
3355 mutex_lock(&fs_info->chunk_mutex);
3356
3357 /*
3358 * The chunk_mutex is held throughout the entirety of a chunk
3359 * allocation, so once we've acquired the chunk_mutex we know that the
3360 * other guy is done and we need to recheck and see if we should
3361 * allocate.
3362 */
3363 if (wait_for_alloc) {
3364 mutex_unlock(&fs_info->chunk_mutex);
3365 wait_for_alloc = 0;
3366 goto again;
3367 }
3368
3305 /* 3369 /*
3306 * If we have mixed data/metadata chunks we want to make sure we keep 3370 * If we have mixed data/metadata chunks we want to make sure we keep
3307 * allocating mixed chunks instead of individual chunks. 3371 * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3327 space_info->full = 1; 3391 space_info->full = 1;
3328 else 3392 else
3329 ret = 1; 3393 ret = 1;
3330 space_info->force_alloc = 0; 3394
3395 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3396 space_info->chunk_alloc = 0;
3331 spin_unlock(&space_info->lock); 3397 spin_unlock(&space_info->lock);
3332out:
3333 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3398 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3334 return ret; 3399 return ret;
3335} 3400}
@@ -5303,11 +5368,13 @@ loop:
5303 5368
5304 if (allowed_chunk_alloc) { 5369 if (allowed_chunk_alloc) {
5305 ret = do_chunk_alloc(trans, root, num_bytes + 5370 ret = do_chunk_alloc(trans, root, num_bytes +
5306 2 * 1024 * 1024, data, 1); 5371 2 * 1024 * 1024, data,
5372 CHUNK_ALLOC_LIMITED);
5307 allowed_chunk_alloc = 0; 5373 allowed_chunk_alloc = 0;
5308 done_chunk_alloc = 1; 5374 done_chunk_alloc = 1;
5309 } else if (!done_chunk_alloc) { 5375 } else if (!done_chunk_alloc &&
5310 space_info->force_alloc = 1; 5376 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5377 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5311 } 5378 }
5312 5379
5313 if (loop < LOOP_NO_EMPTY_SIZE) { 5380 if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
5393 */ 5460 */
5394 if (empty_size || root->ref_cows) 5461 if (empty_size || root->ref_cows)
5395 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5462 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5396 num_bytes + 2 * 1024 * 1024, data, 0); 5463 num_bytes + 2 * 1024 * 1024, data,
5464 CHUNK_ALLOC_NO_FORCE);
5397 5465
5398 WARN_ON(num_bytes < root->sectorsize); 5466 WARN_ON(num_bytes < root->sectorsize);
5399 ret = find_free_extent(trans, root, num_bytes, empty_size, 5467 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
5405 num_bytes = num_bytes & ~(root->sectorsize - 1); 5473 num_bytes = num_bytes & ~(root->sectorsize - 1);
5406 num_bytes = max(num_bytes, min_alloc_size); 5474 num_bytes = max(num_bytes, min_alloc_size);
5407 do_chunk_alloc(trans, root->fs_info->extent_root, 5475 do_chunk_alloc(trans, root->fs_info->extent_root,
5408 num_bytes, data, 1); 5476 num_bytes, data, CHUNK_ALLOC_FORCE);
5409 goto again; 5477 goto again;
5410 } 5478 }
5411 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5479 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -7991,6 +8059,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7991 u64 group_start = group->key.objectid; 8059 u64 group_start = group->key.objectid;
7992 new_extents = kmalloc(sizeof(*new_extents), 8060 new_extents = kmalloc(sizeof(*new_extents),
7993 GFP_NOFS); 8061 GFP_NOFS);
8062 if (!new_extents) {
8063 ret = -ENOMEM;
8064 goto out;
8065 }
7994 nr_extents = 1; 8066 nr_extents = 1;
7995 ret = get_new_locations(reloc_inode, 8067 ret = get_new_locations(reloc_inode,
7996 extent_key, 8068 extent_key,
@@ -8109,13 +8181,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8109 8181
8110 alloc_flags = update_block_group_flags(root, cache->flags); 8182 alloc_flags = update_block_group_flags(root, cache->flags);
8111 if (alloc_flags != cache->flags) 8183 if (alloc_flags != cache->flags)
8112 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8184 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8185 CHUNK_ALLOC_FORCE);
8113 8186
8114 ret = set_block_group_ro(cache); 8187 ret = set_block_group_ro(cache);
8115 if (!ret) 8188 if (!ret)
8116 goto out; 8189 goto out;
8117 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 8190 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8118 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8191 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8192 CHUNK_ALLOC_FORCE);
8119 if (ret < 0) 8193 if (ret < 0)
8120 goto out; 8194 goto out;
8121 ret = set_block_group_ro(cache); 8195 ret = set_block_group_ro(cache);
@@ -8128,7 +8202,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8128 struct btrfs_root *root, u64 type) 8202 struct btrfs_root *root, u64 type)
8129{ 8203{
8130 u64 alloc_flags = get_alloc_profile(root, type); 8204 u64 alloc_flags = get_alloc_profile(root, type);
8131 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8205 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8206 CHUNK_ALLOC_FORCE);
8132} 8207}
8133 8208
8134/* 8209/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 20ddb28602a8..ba41da59e31b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
690 } 690 }
691} 691}
692 692
693static void uncache_state(struct extent_state **cached_ptr)
694{
695 if (cached_ptr && (*cached_ptr)) {
696 struct extent_state *state = *cached_ptr;
697 *cached_ptr = NULL;
698 free_extent_state(state);
699 }
700}
701
693/* 702/*
694 * set some bits on a range in the tree. This may require allocations or 703 * set some bits on a range in the tree. This may require allocations or
695 * sleeping, so the gfp mask is used to indicate what is allowed. 704 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
940} 949}
941 950
942int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 951int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
943 gfp_t mask) 952 struct extent_state **cached_state, gfp_t mask)
944{ 953{
945 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 954 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
946 NULL, mask); 955 NULL, cached_state, mask);
947} 956}
948 957
949static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 958static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1012 mask); 1021 mask);
1013} 1022}
1014 1023
1015int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1024int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1016 gfp_t mask)
1017{ 1025{
1018 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1026 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1019 mask); 1027 mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1735 1743
1736 do { 1744 do {
1737 struct page *page = bvec->bv_page; 1745 struct page *page = bvec->bv_page;
1746 struct extent_state *cached = NULL;
1747 struct extent_state *state;
1748
1738 tree = &BTRFS_I(page->mapping->host)->io_tree; 1749 tree = &BTRFS_I(page->mapping->host)->io_tree;
1739 1750
1740 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1751 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1749 if (++bvec <= bvec_end) 1760 if (++bvec <= bvec_end)
1750 prefetchw(&bvec->bv_page->flags); 1761 prefetchw(&bvec->bv_page->flags);
1751 1762
1763 spin_lock(&tree->lock);
1764 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
1765 if (state && state->start == start) {
1766 /*
1767 * take a reference on the state, unlock will drop
1768 * the ref
1769 */
1770 cache_state(state, &cached);
1771 }
1772 spin_unlock(&tree->lock);
1773
1752 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1774 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1753 ret = tree->ops->readpage_end_io_hook(page, start, end, 1775 ret = tree->ops->readpage_end_io_hook(page, start, end,
1754 NULL); 1776 state);
1755 if (ret) 1777 if (ret)
1756 uptodate = 0; 1778 uptodate = 0;
1757 } 1779 }
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1764 test_bit(BIO_UPTODATE, &bio->bi_flags); 1786 test_bit(BIO_UPTODATE, &bio->bi_flags);
1765 if (err) 1787 if (err)
1766 uptodate = 0; 1788 uptodate = 0;
1789 uncache_state(&cached);
1767 continue; 1790 continue;
1768 } 1791 }
1769 } 1792 }
1770 1793
1771 if (uptodate) { 1794 if (uptodate) {
1772 set_extent_uptodate(tree, start, end, 1795 set_extent_uptodate(tree, start, end, &cached,
1773 GFP_ATOMIC); 1796 GFP_ATOMIC);
1774 } 1797 }
1775 unlock_extent(tree, start, end, GFP_ATOMIC); 1798 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1776 1799
1777 if (whole_page) { 1800 if (whole_page) {
1778 if (uptodate) { 1801 if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1811 1834
1812 do { 1835 do {
1813 struct page *page = bvec->bv_page; 1836 struct page *page = bvec->bv_page;
1837 struct extent_state *cached = NULL;
1814 tree = &BTRFS_I(page->mapping->host)->io_tree; 1838 tree = &BTRFS_I(page->mapping->host)->io_tree;
1815 1839
1816 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1840 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1821 prefetchw(&bvec->bv_page->flags); 1845 prefetchw(&bvec->bv_page->flags);
1822 1846
1823 if (uptodate) { 1847 if (uptodate) {
1824 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1848 set_extent_uptodate(tree, start, end, &cached,
1849 GFP_ATOMIC);
1825 } else { 1850 } else {
1826 ClearPageUptodate(page); 1851 ClearPageUptodate(page);
1827 SetPageError(page); 1852 SetPageError(page);
1828 } 1853 }
1829 1854
1830 unlock_extent(tree, start, end, GFP_ATOMIC); 1855 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1831 1856
1832 } while (bvec >= bio->bi_io_vec); 1857 } while (bvec >= bio->bi_io_vec);
1833 1858
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2016 while (cur <= end) { 2041 while (cur <= end) {
2017 if (cur >= last_byte) { 2042 if (cur >= last_byte) {
2018 char *userpage; 2043 char *userpage;
2044 struct extent_state *cached = NULL;
2045
2019 iosize = PAGE_CACHE_SIZE - page_offset; 2046 iosize = PAGE_CACHE_SIZE - page_offset;
2020 userpage = kmap_atomic(page, KM_USER0); 2047 userpage = kmap_atomic(page, KM_USER0);
2021 memset(userpage + page_offset, 0, iosize); 2048 memset(userpage + page_offset, 0, iosize);
2022 flush_dcache_page(page); 2049 flush_dcache_page(page);
2023 kunmap_atomic(userpage, KM_USER0); 2050 kunmap_atomic(userpage, KM_USER0);
2024 set_extent_uptodate(tree, cur, cur + iosize - 1, 2051 set_extent_uptodate(tree, cur, cur + iosize - 1,
2025 GFP_NOFS); 2052 &cached, GFP_NOFS);
2026 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2053 unlock_extent_cached(tree, cur, cur + iosize - 1,
2054 &cached, GFP_NOFS);
2027 break; 2055 break;
2028 } 2056 }
2029 em = get_extent(inode, page, page_offset, cur, 2057 em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2063 /* we've found a hole, just zero and go on */ 2091 /* we've found a hole, just zero and go on */
2064 if (block_start == EXTENT_MAP_HOLE) { 2092 if (block_start == EXTENT_MAP_HOLE) {
2065 char *userpage; 2093 char *userpage;
2094 struct extent_state *cached = NULL;
2095
2066 userpage = kmap_atomic(page, KM_USER0); 2096 userpage = kmap_atomic(page, KM_USER0);
2067 memset(userpage + page_offset, 0, iosize); 2097 memset(userpage + page_offset, 0, iosize);
2068 flush_dcache_page(page); 2098 flush_dcache_page(page);
2069 kunmap_atomic(userpage, KM_USER0); 2099 kunmap_atomic(userpage, KM_USER0);
2070 2100
2071 set_extent_uptodate(tree, cur, cur + iosize - 1, 2101 set_extent_uptodate(tree, cur, cur + iosize - 1,
2072 GFP_NOFS); 2102 &cached, GFP_NOFS);
2073 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2103 unlock_extent_cached(tree, cur, cur + iosize - 1,
2104 &cached, GFP_NOFS);
2074 cur = cur + iosize; 2105 cur = cur + iosize;
2075 page_offset += iosize; 2106 page_offset += iosize;
2076 continue; 2107 continue;
@@ -2650,7 +2681,7 @@ int extent_readpages(struct extent_io_tree *tree,
2650 prefetchw(&page->flags); 2681 prefetchw(&page->flags);
2651 list_del(&page->lru); 2682 list_del(&page->lru);
2652 if (!add_to_page_cache_lru(page, mapping, 2683 if (!add_to_page_cache_lru(page, mapping,
2653 page->index, GFP_KERNEL)) { 2684 page->index, GFP_NOFS)) {
2654 __extent_read_full_page(tree, page, get_extent, 2685 __extent_read_full_page(tree, page, get_extent,
2655 &bio, 0, &bio_flags); 2686 &bio, 0, &bio_flags);
2656 } 2687 }
@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
2789 iocount++; 2820 iocount++;
2790 block_start = block_start + iosize; 2821 block_start = block_start + iosize;
2791 } else { 2822 } else {
2792 set_extent_uptodate(tree, block_start, cur_end, 2823 struct extent_state *cached = NULL;
2824
2825 set_extent_uptodate(tree, block_start, cur_end, &cached,
2793 GFP_NOFS); 2826 GFP_NOFS);
2794 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2827 unlock_extent_cached(tree, block_start, cur_end,
2828 &cached, GFP_NOFS);
2795 block_start = cur_end + 1; 2829 block_start = cur_end + 1;
2796 } 2830 }
2797 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2831 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3457 num_pages = num_extent_pages(eb->start, eb->len); 3491 num_pages = num_extent_pages(eb->start, eb->len);
3458 3492
3459 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3493 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3460 GFP_NOFS); 3494 NULL, GFP_NOFS);
3461 for (i = 0; i < num_pages; i++) { 3495 for (i = 0; i < num_pages; i++) {
3462 page = extent_buffer_page(eb, i); 3496 page = extent_buffer_page(eb, i);
3463 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3497 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
3885 kunmap_atomic(dst_kaddr, KM_USER0); 3919 kunmap_atomic(dst_kaddr, KM_USER0);
3886} 3920}
3887 3921
3922static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
3923{
3924 unsigned long distance = (src > dst) ? src - dst : dst - src;
3925 return distance < len;
3926}
3927
3888static void copy_pages(struct page *dst_page, struct page *src_page, 3928static void copy_pages(struct page *dst_page, struct page *src_page,
3889 unsigned long dst_off, unsigned long src_off, 3929 unsigned long dst_off, unsigned long src_off,
3890 unsigned long len) 3930 unsigned long len)
@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
3892 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3932 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3893 char *src_kaddr; 3933 char *src_kaddr;
3894 3934
3895 if (dst_page != src_page) 3935 if (dst_page != src_page) {
3896 src_kaddr = kmap_atomic(src_page, KM_USER1); 3936 src_kaddr = kmap_atomic(src_page, KM_USER1);
3897 else 3937 } else {
3898 src_kaddr = dst_kaddr; 3938 src_kaddr = dst_kaddr;
3939 BUG_ON(areas_overlap(src_off, dst_off, len));
3940 }
3899 3941
3900 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3942 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3901 kunmap_atomic(dst_kaddr, KM_USER0); 3943 kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3970 "len %lu len %lu\n", dst_offset, len, dst->len); 4012 "len %lu len %lu\n", dst_offset, len, dst->len);
3971 BUG_ON(1); 4013 BUG_ON(1);
3972 } 4014 }
3973 if (dst_offset < src_offset) { 4015 if (!areas_overlap(src_offset, dst_offset, len)) {
3974 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4016 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3975 return; 4017 return;
3976 } 4018 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f62c5442835d..af2d7179c372 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
208 int bits, int exclusive_bits, u64 *failed_start, 208 int bits, int exclusive_bits, u64 *failed_start,
209 struct extent_state **cached_state, gfp_t mask); 209 struct extent_state **cached_state, gfp_t mask);
210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
211 gfp_t mask); 211 struct extent_state **cached_state, gfp_t mask);
212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
213 gfp_t mask); 213 gfp_t mask);
214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e621ea54a3fd..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
104/* 104/*
105 * unlocks pages after btrfs_file_write is done with them 105 * unlocks pages after btrfs_file_write is done with them
106 */ 106 */
107static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) 107void btrfs_drop_pages(struct page **pages, size_t num_pages)
108{ 108{
109 size_t i; 109 size_t i;
110 for (i = 0; i < num_pages; i++) { 110 for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
127 * this also makes the decision about creating an inline extent vs 127 * this also makes the decision about creating an inline extent vs
128 * doing real data extents, marking pages dirty and delalloc as required. 128 * doing real data extents, marking pages dirty and delalloc as required.
129 */ 129 */
130static noinline int dirty_and_release_pages(struct btrfs_root *root, 130int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
131 struct file *file, 131 struct page **pages, size_t num_pages,
132 struct page **pages, 132 loff_t pos, size_t write_bytes,
133 size_t num_pages, 133 struct extent_state **cached)
134 loff_t pos,
135 size_t write_bytes)
136{ 134{
137 int err = 0; 135 int err = 0;
138 int i; 136 int i;
139 struct inode *inode = fdentry(file)->d_inode;
140 u64 num_bytes; 137 u64 num_bytes;
141 u64 start_pos; 138 u64 start_pos;
142 u64 end_of_last_block; 139 u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
149 146
150 end_of_last_block = start_pos + num_bytes - 1; 147 end_of_last_block = start_pos + num_bytes - 1;
151 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 148 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
152 NULL); 149 cached);
153 if (err) 150 if (err)
154 return err; 151 return err;
155 152
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
992 } 989 }
993 990
994 if (copied > 0) { 991 if (copied > 0) {
995 ret = dirty_and_release_pages(root, file, pages, 992 ret = btrfs_dirty_pages(root, inode, pages,
996 dirty_pages, pos, 993 dirty_pages, pos, copied,
997 copied); 994 NULL);
998 if (ret) { 995 if (ret) {
999 btrfs_delalloc_release_space(inode, 996 btrfs_delalloc_release_space(inode,
1000 dirty_pages << PAGE_CACHE_SHIFT); 997 dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f561c953205b..63731a1fb0a1 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
508 struct inode *inode; 508 struct inode *inode;
509 struct rb_node *node; 509 struct rb_node *node;
510 struct list_head *pos, *n; 510 struct list_head *pos, *n;
511 struct page **pages;
511 struct page *page; 512 struct page *page;
512 struct extent_state *cached_state = NULL; 513 struct extent_state *cached_state = NULL;
513 struct btrfs_free_cluster *cluster = NULL; 514 struct btrfs_free_cluster *cluster = NULL;
@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
517 u64 start, end, len; 518 u64 start, end, len;
518 u64 bytes = 0; 519 u64 bytes = 0;
519 u32 *crc, *checksums; 520 u32 *crc, *checksums;
520 pgoff_t index = 0, last_index = 0;
521 unsigned long first_page_offset; 521 unsigned long first_page_offset;
522 int num_checksums; 522 int index = 0, num_pages = 0;
523 int entries = 0; 523 int entries = 0;
524 int bitmaps = 0; 524 int bitmaps = 0;
525 int ret = 0; 525 int ret = 0;
526 bool next_page = false; 526 bool next_page = false;
527 bool out_of_space = false;
527 528
528 root = root->fs_info->tree_root; 529 root = root->fs_info->tree_root;
529 530
@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root,
551 return 0; 552 return 0;
552 } 553 }
553 554
554 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT;
555 filemap_write_and_wait(inode->i_mapping); 557 filemap_write_and_wait(inode->i_mapping);
556 btrfs_wait_ordered_range(inode, inode->i_size & 558 btrfs_wait_ordered_range(inode, inode->i_size &
557 ~(root->sectorsize - 1), (u64)-1); 559 ~(root->sectorsize - 1), (u64)-1);
558 560
559 /* We need a checksum per page. */ 561 /* We need a checksum per page. */
560 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
561 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
562 if (!crc) { 563 if (!crc) {
563 iput(inode); 564 iput(inode);
564 return 0; 565 return 0;
565 } 566 }
566 567
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) {
570 kfree(crc);
571 iput(inode);
572 return 0;
573 }
574
567 /* Since the first page has all of our checksums and our generation we 575 /* Since the first page has all of our checksums and our generation we
568 * need to calculate the offset into the page that we can start writing 576 * need to calculate the offset into the page that we can start writing
569 * our entries. 577 * our entries.
570 */ 578 */
571 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
572 580
573 /* Get the cluster for this block_group if it exists */ 581 /* Get the cluster for this block_group if it exists */
574 if (!list_empty(&block_group->cluster_list)) 582 if (!list_empty(&block_group->cluster_list))
@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
590 * after find_get_page at this point. Just putting this here so people 598 * after find_get_page at this point. Just putting this here so people
591 * know and don't freak out. 599 * know and don't freak out.
592 */ 600 */
593 while (index <= last_index) { 601 while (index < num_pages) {
594 page = grab_cache_page(inode->i_mapping, index); 602 page = grab_cache_page(inode->i_mapping, index);
595 if (!page) { 603 if (!page) {
596 pgoff_t i = 0; 604 int i;
597 605
598 while (i < index) { 606 for (i = 0; i < num_pages; i++) {
599 page = find_get_page(inode->i_mapping, i); 607 unlock_page(pages[i]);
600 unlock_page(page); 608 page_cache_release(pages[i]);
601 page_cache_release(page);
602 page_cache_release(page);
603 i++;
604 } 609 }
605 goto out_free; 610 goto out_free;
606 } 611 }
612 pages[index] = page;
607 index++; 613 index++;
608 } 614 }
609 615
@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
631 offset = start_offset; 637 offset = start_offset;
632 } 638 }
633 639
634 page = find_get_page(inode->i_mapping, index); 640 if (index >= num_pages) {
641 out_of_space = true;
642 break;
643 }
644
645 page = pages[index];
635 646
636 addr = kmap(page); 647 addr = kmap(page);
637 entry = addr + start_offset; 648 entry = addr + start_offset;
@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
708 719
709 bytes += PAGE_CACHE_SIZE; 720 bytes += PAGE_CACHE_SIZE;
710 721
711 ClearPageChecked(page);
712 set_page_extent_mapped(page);
713 SetPageUptodate(page);
714 set_page_dirty(page);
715
716 /*
717 * We need to release our reference we got for grab_cache_page,
718 * except for the first page which will hold our checksums, we
719 * do that below.
720 */
721 if (index != 0) {
722 unlock_page(page);
723 page_cache_release(page);
724 }
725
726 page_cache_release(page);
727
728 index++; 722 index++;
729 } while (node || next_page); 723 } while (node || next_page);
730 724
@@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
734 struct btrfs_free_space *entry = 728 struct btrfs_free_space *entry =
735 list_entry(pos, struct btrfs_free_space, list); 729 list_entry(pos, struct btrfs_free_space, list);
736 730
737 page = find_get_page(inode->i_mapping, index); 731 if (index >= num_pages) {
732 out_of_space = true;
733 break;
734 }
735 page = pages[index];
738 736
739 addr = kmap(page); 737 addr = kmap(page);
740 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 738 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
745 crc++; 743 crc++;
746 bytes += PAGE_CACHE_SIZE; 744 bytes += PAGE_CACHE_SIZE;
747 745
748 ClearPageChecked(page);
749 set_page_extent_mapped(page);
750 SetPageUptodate(page);
751 set_page_dirty(page);
752 unlock_page(page);
753 page_cache_release(page);
754 page_cache_release(page);
755 list_del_init(&entry->list); 746 list_del_init(&entry->list);
756 index++; 747 index++;
757 } 748 }
758 749
750 if (out_of_space) {
751 btrfs_drop_pages(pages, num_pages);
752 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753 i_size_read(inode) - 1, &cached_state,
754 GFP_NOFS);
755 ret = 0;
756 goto out_free;
757 }
758
759 /* Zero out the rest of the pages just to make sure */ 759 /* Zero out the rest of the pages just to make sure */
760 while (index <= last_index) { 760 while (index < num_pages) {
761 void *addr; 761 void *addr;
762 762
763 page = find_get_page(inode->i_mapping, index); 763 page = pages[index];
764
765 addr = kmap(page); 764 addr = kmap(page);
766 memset(addr, 0, PAGE_CACHE_SIZE); 765 memset(addr, 0, PAGE_CACHE_SIZE);
767 kunmap(page); 766 kunmap(page);
768 ClearPageChecked(page);
769 set_page_extent_mapped(page);
770 SetPageUptodate(page);
771 set_page_dirty(page);
772 unlock_page(page);
773 page_cache_release(page);
774 page_cache_release(page);
775 bytes += PAGE_CACHE_SIZE; 767 bytes += PAGE_CACHE_SIZE;
776 index++; 768 index++;
777 } 769 }
778 770
779 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
780
781 /* Write the checksums and trans id to the first page */ 771 /* Write the checksums and trans id to the first page */
782 { 772 {
783 void *addr; 773 void *addr;
784 u64 *gen; 774 u64 *gen;
785 775
786 page = find_get_page(inode->i_mapping, 0); 776 page = pages[0];
787 777
788 addr = kmap(page); 778 addr = kmap(page);
789 memcpy(addr, checksums, sizeof(u32) * num_checksums); 779 memcpy(addr, checksums, sizeof(u32) * num_pages);
790 gen = addr + (sizeof(u32) * num_checksums); 780 gen = addr + (sizeof(u32) * num_pages);
791 *gen = trans->transid; 781 *gen = trans->transid;
792 kunmap(page); 782 kunmap(page);
793 ClearPageChecked(page);
794 set_page_extent_mapped(page);
795 SetPageUptodate(page);
796 set_page_dirty(page);
797 unlock_page(page);
798 page_cache_release(page);
799 page_cache_release(page);
800 } 783 }
801 BTRFS_I(inode)->generation = trans->transid;
802 784
785 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786 bytes, &cached_state);
787 btrfs_drop_pages(pages, num_pages);
803 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
804 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
805 790
791 if (ret) {
792 ret = 0;
793 goto out_free;
794 }
795
796 BTRFS_I(inode)->generation = trans->transid;
797
806 filemap_write_and_wait(inode->i_mapping); 798 filemap_write_and_wait(inode->i_mapping);
807 799
808 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -853,6 +845,7 @@ out_free:
853 BTRFS_I(inode)->generation = 0; 845 BTRFS_I(inode)->generation = 0;
854 } 846 }
855 kfree(checksums); 847 kfree(checksums);
848 kfree(pages);
856 btrfs_update_inode(trans, root, inode); 849 btrfs_update_inode(trans, root, inode);
857 iput(inode); 850 iput(inode);
858 return ret; 851 return ret;
@@ -1775,10 +1768,13 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1775 1768
1776 while ((node = rb_last(&block_group->free_space_offset)) != NULL) { 1769 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1777 info = rb_entry(node, struct btrfs_free_space, offset_index); 1770 info = rb_entry(node, struct btrfs_free_space, offset_index);
1778 unlink_free_space(block_group, info); 1771 if (!info->bitmap) {
1779 if (info->bitmap) 1772 unlink_free_space(block_group, info);
1780 kfree(info->bitmap); 1773 kmem_cache_free(btrfs_free_space_cachep, info);
1781 kmem_cache_free(btrfs_free_space_cachep, info); 1774 } else {
1775 free_bitmap(block_group, info);
1776 }
1777
1782 if (need_resched()) { 1778 if (need_resched()) {
1783 spin_unlock(&block_group->tree_lock); 1779 spin_unlock(&block_group->tree_lock);
1784 cond_resched(); 1780 cond_resched();
@@ -2308,7 +2304,7 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2308 start = entry->offset; 2304 start = entry->offset;
2309 bytes = min(entry->bytes, end - start); 2305 bytes = min(entry->bytes, end - start);
2310 unlink_free_space(block_group, entry); 2306 unlink_free_space(block_group, entry);
2311 kfree(entry); 2307 kmem_cache_free(btrfs_free_space_cachep, entry);
2312 } 2308 }
2313 2309
2314 spin_unlock(&block_group->tree_lock); 2310 spin_unlock(&block_group->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5cc64ab9c485..7cd8ab0ef04d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -954,6 +954,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
954 1, 0, NULL, GFP_NOFS); 954 1, 0, NULL, GFP_NOFS);
955 while (start < end) { 955 while (start < end) {
956 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 956 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
957 BUG_ON(!async_cow);
957 async_cow->inode = inode; 958 async_cow->inode = inode;
958 async_cow->root = root; 959 async_cow->root = root;
959 async_cow->locked_page = locked_page; 960 async_cow->locked_page = locked_page;
@@ -1770,9 +1771,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1770 add_pending_csums(trans, inode, ordered_extent->file_offset, 1771 add_pending_csums(trans, inode, ordered_extent->file_offset,
1771 &ordered_extent->list); 1772 &ordered_extent->list);
1772 1773
1773 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1774 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1774 ret = btrfs_update_inode(trans, root, inode); 1775 if (!ret) {
1775 BUG_ON(ret); 1776 ret = btrfs_update_inode(trans, root, inode);
1777 BUG_ON(ret);
1778 }
1779 ret = 0;
1776out: 1780out:
1777 if (nolock) { 1781 if (nolock) {
1778 if (trans) 1782 if (trans)
@@ -2590,6 +2594,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2590 struct btrfs_inode_item *item, 2594 struct btrfs_inode_item *item,
2591 struct inode *inode) 2595 struct inode *inode)
2592{ 2596{
2597 if (!leaf->map_token)
2598 map_private_extent_buffer(leaf, (unsigned long)item,
2599 sizeof(struct btrfs_inode_item),
2600 &leaf->map_token, &leaf->kaddr,
2601 &leaf->map_start, &leaf->map_len,
2602 KM_USER1);
2603
2593 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2604 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2594 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2605 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2595 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2606 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2618,6 +2629,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2618 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2629 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2619 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2630 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2620 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2631 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
2632
2633 if (leaf->map_token) {
2634 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2635 leaf->map_token = NULL;
2636 }
2621} 2637}
2622 2638
2623/* 2639/*
@@ -4207,10 +4223,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4207 struct btrfs_key found_key; 4223 struct btrfs_key found_key;
4208 struct btrfs_path *path; 4224 struct btrfs_path *path;
4209 int ret; 4225 int ret;
4210 u32 nritems;
4211 struct extent_buffer *leaf; 4226 struct extent_buffer *leaf;
4212 int slot; 4227 int slot;
4213 int advance;
4214 unsigned char d_type; 4228 unsigned char d_type;
4215 int over = 0; 4229 int over = 0;
4216 u32 di_cur; 4230 u32 di_cur;
@@ -4253,27 +4267,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4253 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4267 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4254 if (ret < 0) 4268 if (ret < 0)
4255 goto err; 4269 goto err;
4256 advance = 0;
4257 4270
4258 while (1) { 4271 while (1) {
4259 leaf = path->nodes[0]; 4272 leaf = path->nodes[0];
4260 nritems = btrfs_header_nritems(leaf);
4261 slot = path->slots[0]; 4273 slot = path->slots[0];
4262 if (advance || slot >= nritems) { 4274 if (slot >= btrfs_header_nritems(leaf)) {
4263 if (slot >= nritems - 1) { 4275 ret = btrfs_next_leaf(root, path);
4264 ret = btrfs_next_leaf(root, path); 4276 if (ret < 0)
4265 if (ret) 4277 goto err;
4266 break; 4278 else if (ret > 0)
4267 leaf = path->nodes[0]; 4279 break;
4268 nritems = btrfs_header_nritems(leaf); 4280 continue;
4269 slot = path->slots[0];
4270 } else {
4271 slot++;
4272 path->slots[0]++;
4273 }
4274 } 4281 }
4275 4282
4276 advance = 1;
4277 item = btrfs_item_nr(leaf, slot); 4283 item = btrfs_item_nr(leaf, slot);
4278 btrfs_item_key_to_cpu(leaf, &found_key, slot); 4284 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4279 4285
@@ -4282,7 +4288,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4282 if (btrfs_key_type(&found_key) != key_type) 4288 if (btrfs_key_type(&found_key) != key_type)
4283 break; 4289 break;
4284 if (found_key.offset < filp->f_pos) 4290 if (found_key.offset < filp->f_pos)
4285 continue; 4291 goto next;
4286 4292
4287 filp->f_pos = found_key.offset; 4293 filp->f_pos = found_key.offset;
4288 4294
@@ -4335,6 +4341,8 @@ skip:
4335 di_cur += di_len; 4341 di_cur += di_len;
4336 di = (struct btrfs_dir_item *)((char *)di + di_len); 4342 di = (struct btrfs_dir_item *)((char *)di + di_len);
4337 } 4343 }
4344next:
4345 path->slots[0]++;
4338 } 4346 }
4339 4347
4340 /* Reached end of directory/root. Bump pos past the last item. */ 4348 /* Reached end of directory/root. Bump pos past the last item. */
@@ -4527,14 +4535,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4527 BUG_ON(!path); 4535 BUG_ON(!path);
4528 4536
4529 inode = new_inode(root->fs_info->sb); 4537 inode = new_inode(root->fs_info->sb);
4530 if (!inode) 4538 if (!inode) {
4539 btrfs_free_path(path);
4531 return ERR_PTR(-ENOMEM); 4540 return ERR_PTR(-ENOMEM);
4541 }
4532 4542
4533 if (dir) { 4543 if (dir) {
4534 trace_btrfs_inode_request(dir); 4544 trace_btrfs_inode_request(dir);
4535 4545
4536 ret = btrfs_set_inode_index(dir, index); 4546 ret = btrfs_set_inode_index(dir, index);
4537 if (ret) { 4547 if (ret) {
4548 btrfs_free_path(path);
4538 iput(inode); 4549 iput(inode);
4539 return ERR_PTR(ret); 4550 return ERR_PTR(ret);
4540 } 4551 }
@@ -4721,9 +4732,10 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4721 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4732 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4722 dentry->d_name.len, dir->i_ino, objectid, 4733 dentry->d_name.len, dir->i_ino, objectid,
4723 BTRFS_I(dir)->block_group, mode, &index); 4734 BTRFS_I(dir)->block_group, mode, &index);
4724 err = PTR_ERR(inode); 4735 if (IS_ERR(inode)) {
4725 if (IS_ERR(inode)) 4736 err = PTR_ERR(inode);
4726 goto out_unlock; 4737 goto out_unlock;
4738 }
4727 4739
4728 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 4740 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4729 if (err) { 4741 if (err) {
@@ -4782,9 +4794,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4782 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4794 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4783 dentry->d_name.len, dir->i_ino, objectid, 4795 dentry->d_name.len, dir->i_ino, objectid,
4784 BTRFS_I(dir)->block_group, mode, &index); 4796 BTRFS_I(dir)->block_group, mode, &index);
4785 err = PTR_ERR(inode); 4797 if (IS_ERR(inode)) {
4786 if (IS_ERR(inode)) 4798 err = PTR_ERR(inode);
4787 goto out_unlock; 4799 goto out_unlock;
4800 }
4788 4801
4789 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 4802 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4790 if (err) { 4803 if (err) {
@@ -4834,9 +4847,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4834 if (inode->i_nlink == ~0U) 4847 if (inode->i_nlink == ~0U)
4835 return -EMLINK; 4848 return -EMLINK;
4836 4849
4837 btrfs_inc_nlink(inode);
4838 inode->i_ctime = CURRENT_TIME;
4839
4840 err = btrfs_set_inode_index(dir, &index); 4850 err = btrfs_set_inode_index(dir, &index);
4841 if (err) 4851 if (err)
4842 goto fail; 4852 goto fail;
@@ -4852,6 +4862,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4852 goto fail; 4862 goto fail;
4853 } 4863 }
4854 4864
4865 btrfs_inc_nlink(inode);
4866 inode->i_ctime = CURRENT_TIME;
4867
4855 btrfs_set_trans_block_group(trans, dir); 4868 btrfs_set_trans_block_group(trans, dir);
4856 ihold(inode); 4869 ihold(inode);
4857 4870
@@ -4989,6 +5002,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
4989 inline_size = btrfs_file_extent_inline_item_len(leaf, 5002 inline_size = btrfs_file_extent_inline_item_len(leaf,
4990 btrfs_item_nr(leaf, path->slots[0])); 5003 btrfs_item_nr(leaf, path->slots[0]));
4991 tmp = kmalloc(inline_size, GFP_NOFS); 5004 tmp = kmalloc(inline_size, GFP_NOFS);
5005 if (!tmp)
5006 return -ENOMEM;
4992 ptr = btrfs_file_extent_inline_start(item); 5007 ptr = btrfs_file_extent_inline_start(item);
4993 5008
4994 read_extent_buffer(leaf, tmp, ptr, inline_size); 5009 read_extent_buffer(leaf, tmp, ptr, inline_size);
@@ -5221,7 +5236,7 @@ again:
5221 btrfs_mark_buffer_dirty(leaf); 5236 btrfs_mark_buffer_dirty(leaf);
5222 } 5237 }
5223 set_extent_uptodate(io_tree, em->start, 5238 set_extent_uptodate(io_tree, em->start,
5224 extent_map_end(em) - 1, GFP_NOFS); 5239 extent_map_end(em) - 1, NULL, GFP_NOFS);
5225 goto insert; 5240 goto insert;
5226 } else { 5241 } else {
5227 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); 5242 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5428,17 +5443,30 @@ out:
5428} 5443}
5429 5444
5430static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5445static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5446 struct extent_map *em,
5431 u64 start, u64 len) 5447 u64 start, u64 len)
5432{ 5448{
5433 struct btrfs_root *root = BTRFS_I(inode)->root; 5449 struct btrfs_root *root = BTRFS_I(inode)->root;
5434 struct btrfs_trans_handle *trans; 5450 struct btrfs_trans_handle *trans;
5435 struct extent_map *em;
5436 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5451 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5437 struct btrfs_key ins; 5452 struct btrfs_key ins;
5438 u64 alloc_hint; 5453 u64 alloc_hint;
5439 int ret; 5454 int ret;
5455 bool insert = false;
5440 5456
5441 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5457 /*
5458 * Ok if the extent map we looked up is a hole and is for the exact
5459 * range we want, there is no reason to allocate a new one, however if
5460 * it is not right then we need to free this one and drop the cache for
5461 * our range.
5462 */
5463 if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
5464 em->len != len) {
5465 free_extent_map(em);
5466 em = NULL;
5467 insert = true;
5468 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5469 }
5442 5470
5443 trans = btrfs_join_transaction(root, 0); 5471 trans = btrfs_join_transaction(root, 0);
5444 if (IS_ERR(trans)) 5472 if (IS_ERR(trans))
@@ -5454,10 +5482,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5454 goto out; 5482 goto out;
5455 } 5483 }
5456 5484
5457 em = alloc_extent_map(GFP_NOFS);
5458 if (!em) { 5485 if (!em) {
5459 em = ERR_PTR(-ENOMEM); 5486 em = alloc_extent_map(GFP_NOFS);
5460 goto out; 5487 if (!em) {
5488 em = ERR_PTR(-ENOMEM);
5489 goto out;
5490 }
5461 } 5491 }
5462 5492
5463 em->start = start; 5493 em->start = start;
@@ -5467,9 +5497,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5467 em->block_start = ins.objectid; 5497 em->block_start = ins.objectid;
5468 em->block_len = ins.offset; 5498 em->block_len = ins.offset;
5469 em->bdev = root->fs_info->fs_devices->latest_bdev; 5499 em->bdev = root->fs_info->fs_devices->latest_bdev;
5500
5501 /*
5502 * We need to do this because if we're using the original em we searched
5503 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
5504 */
5505 em->flags = 0;
5470 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5506 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5471 5507
5472 while (1) { 5508 while (insert) {
5473 write_lock(&em_tree->lock); 5509 write_lock(&em_tree->lock);
5474 ret = add_extent_mapping(em_tree, em); 5510 ret = add_extent_mapping(em_tree, em);
5475 write_unlock(&em_tree->lock); 5511 write_unlock(&em_tree->lock);
@@ -5687,8 +5723,7 @@ must_cow:
5687 * it above 5723 * it above
5688 */ 5724 */
5689 len = bh_result->b_size; 5725 len = bh_result->b_size;
5690 free_extent_map(em); 5726 em = btrfs_new_extent_direct(inode, em, start, len);
5691 em = btrfs_new_extent_direct(inode, start, len);
5692 if (IS_ERR(em)) 5727 if (IS_ERR(em))
5693 return PTR_ERR(em); 5728 return PTR_ERR(em);
5694 len = min(len, em->len - (start - em->start)); 5729 len = min(len, em->len - (start - em->start));
@@ -5851,8 +5886,10 @@ again:
5851 } 5886 }
5852 5887
5853 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); 5888 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5854 btrfs_ordered_update_i_size(inode, 0, ordered); 5889 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5855 btrfs_update_inode(trans, root, inode); 5890 if (!ret)
5891 btrfs_update_inode(trans, root, inode);
5892 ret = 0;
5856out_unlock: 5893out_unlock:
5857 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5894 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5858 ordered->file_offset + ordered->len - 1, 5895 ordered->file_offset + ordered->len - 1,
@@ -5938,7 +5975,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5938 5975
5939static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 5976static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5940 int rw, u64 file_offset, int skip_sum, 5977 int rw, u64 file_offset, int skip_sum,
5941 u32 *csums) 5978 u32 *csums, int async_submit)
5942{ 5979{
5943 int write = rw & REQ_WRITE; 5980 int write = rw & REQ_WRITE;
5944 struct btrfs_root *root = BTRFS_I(inode)->root; 5981 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5949,13 +5986,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5949 if (ret) 5986 if (ret)
5950 goto err; 5987 goto err;
5951 5988
5952 if (write && !skip_sum) { 5989 if (skip_sum)
5990 goto map;
5991
5992 if (write && async_submit) {
5953 ret = btrfs_wq_submit_bio(root->fs_info, 5993 ret = btrfs_wq_submit_bio(root->fs_info,
5954 inode, rw, bio, 0, 0, 5994 inode, rw, bio, 0, 0,
5955 file_offset, 5995 file_offset,
5956 __btrfs_submit_bio_start_direct_io, 5996 __btrfs_submit_bio_start_direct_io,
5957 __btrfs_submit_bio_done); 5997 __btrfs_submit_bio_done);
5958 goto err; 5998 goto err;
5999 } else if (write) {
6000 /*
6001 * If we aren't doing async submit, calculate the csum of the
6002 * bio now.
6003 */
6004 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
6005 if (ret)
6006 goto err;
5959 } else if (!skip_sum) { 6007 } else if (!skip_sum) {
5960 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6008 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
5961 file_offset, csums); 6009 file_offset, csums);
@@ -5963,7 +6011,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5963 goto err; 6011 goto err;
5964 } 6012 }
5965 6013
5966 ret = btrfs_map_bio(root, rw, bio, 0, 1); 6014map:
6015 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
5967err: 6016err:
5968 bio_put(bio); 6017 bio_put(bio);
5969 return ret; 6018 return ret;
@@ -5985,23 +6034,30 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5985 int nr_pages = 0; 6034 int nr_pages = 0;
5986 u32 *csums = dip->csums; 6035 u32 *csums = dip->csums;
5987 int ret = 0; 6036 int ret = 0;
6037 int async_submit = 0;
5988 int write = rw & REQ_WRITE; 6038 int write = rw & REQ_WRITE;
5989 6039
5990 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5991 if (!bio)
5992 return -ENOMEM;
5993 bio->bi_private = dip;
5994 bio->bi_end_io = btrfs_end_dio_bio;
5995 atomic_inc(&dip->pending_bios);
5996
5997 map_length = orig_bio->bi_size; 6040 map_length = orig_bio->bi_size;
5998 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6041 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5999 &map_length, NULL, 0); 6042 &map_length, NULL, 0);
6000 if (ret) { 6043 if (ret) {
6001 bio_put(bio); 6044 bio_put(orig_bio);
6002 return -EIO; 6045 return -EIO;
6003 } 6046 }
6004 6047
6048 if (map_length >= orig_bio->bi_size) {
6049 bio = orig_bio;
6050 goto submit;
6051 }
6052
6053 async_submit = 1;
6054 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6055 if (!bio)
6056 return -ENOMEM;
6057 bio->bi_private = dip;
6058 bio->bi_end_io = btrfs_end_dio_bio;
6059 atomic_inc(&dip->pending_bios);
6060
6005 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 6061 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
6006 if (unlikely(map_length < submit_len + bvec->bv_len || 6062 if (unlikely(map_length < submit_len + bvec->bv_len ||
6007 bio_add_page(bio, bvec->bv_page, bvec->bv_len, 6063 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6015,7 +6071,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6015 atomic_inc(&dip->pending_bios); 6071 atomic_inc(&dip->pending_bios);
6016 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6072 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6017 file_offset, skip_sum, 6073 file_offset, skip_sum,
6018 csums); 6074 csums, async_submit);
6019 if (ret) { 6075 if (ret) {
6020 bio_put(bio); 6076 bio_put(bio);
6021 atomic_dec(&dip->pending_bios); 6077 atomic_dec(&dip->pending_bios);
@@ -6052,8 +6108,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6052 } 6108 }
6053 } 6109 }
6054 6110
6111submit:
6055 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6112 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6056 csums); 6113 csums, async_submit);
6057 if (!ret) 6114 if (!ret)
6058 return 0; 6115 return 0;
6059 6116
@@ -6148,6 +6205,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6148 unsigned long nr_segs) 6205 unsigned long nr_segs)
6149{ 6206{
6150 int seg; 6207 int seg;
6208 int i;
6151 size_t size; 6209 size_t size;
6152 unsigned long addr; 6210 unsigned long addr;
6153 unsigned blocksize_mask = root->sectorsize - 1; 6211 unsigned blocksize_mask = root->sectorsize - 1;
@@ -6162,8 +6220,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6162 addr = (unsigned long)iov[seg].iov_base; 6220 addr = (unsigned long)iov[seg].iov_base;
6163 size = iov[seg].iov_len; 6221 size = iov[seg].iov_len;
6164 end += size; 6222 end += size;
6165 if ((addr & blocksize_mask) || (size & blocksize_mask)) 6223 if ((addr & blocksize_mask) || (size & blocksize_mask))
6166 goto out; 6224 goto out;
6225
6226 /* If this is a write we don't need to check anymore */
6227 if (rw & WRITE)
6228 continue;
6229
6230 /*
6231 * Check to make sure we don't have duplicate iov_base's in this
6232 * iovec, if so return EINVAL, otherwise we'll get csum errors
6233 * when reading back.
6234 */
6235 for (i = seg + 1; i < nr_segs; i++) {
6236 if (iov[seg].iov_base == iov[i].iov_base)
6237 goto out;
6238 }
6167 } 6239 }
6168 retval = 0; 6240 retval = 0;
6169out: 6241out:
@@ -7206,9 +7278,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7206 dentry->d_name.len, dir->i_ino, objectid, 7278 dentry->d_name.len, dir->i_ino, objectid,
7207 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7279 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
7208 &index); 7280 &index);
7209 err = PTR_ERR(inode); 7281 if (IS_ERR(inode)) {
7210 if (IS_ERR(inode)) 7282 err = PTR_ERR(inode);
7211 goto out_unlock; 7283 goto out_unlock;
7284 }
7212 7285
7213 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); 7286 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
7214 if (err) { 7287 if (err) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cfc264fefdb0..ffb48d6c5433 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2287,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2287 struct btrfs_ioctl_space_info space; 2287 struct btrfs_ioctl_space_info space;
2288 struct btrfs_ioctl_space_info *dest; 2288 struct btrfs_ioctl_space_info *dest;
2289 struct btrfs_ioctl_space_info *dest_orig; 2289 struct btrfs_ioctl_space_info *dest_orig;
2290 struct btrfs_ioctl_space_info *user_dest; 2290 struct btrfs_ioctl_space_info __user *user_dest;
2291 struct btrfs_space_info *info; 2291 struct btrfs_space_info *info;
2292 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2292 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2293 BTRFS_BLOCK_GROUP_SYSTEM, 2293 BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 58e7de9cc90c..0ac712efcdf2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ enum {
159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
162 Opt_enospc_debug, Opt_err, 162 Opt_enospc_debug, Opt_subvolrootid, Opt_err,
163}; 163};
164 164
165static match_table_t tokens = { 165static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
189 {Opt_clear_cache, "clear_cache"}, 189 {Opt_clear_cache, "clear_cache"},
190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
191 {Opt_enospc_debug, "enospc_debug"}, 191 {Opt_enospc_debug, "enospc_debug"},
192 {Opt_subvolrootid, "subvolrootid=%d"},
192 {Opt_err, NULL}, 193 {Opt_err, NULL},
193}; 194};
194 195
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
232 break; 233 break;
233 case Opt_subvol: 234 case Opt_subvol:
234 case Opt_subvolid: 235 case Opt_subvolid:
236 case Opt_subvolrootid:
235 case Opt_device: 237 case Opt_device:
236 /* 238 /*
237 * These are parsed by btrfs_parse_early_options 239 * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
388 */ 390 */
389static int btrfs_parse_early_options(const char *options, fmode_t flags, 391static int btrfs_parse_early_options(const char *options, fmode_t flags,
390 void *holder, char **subvol_name, u64 *subvol_objectid, 392 void *holder, char **subvol_name, u64 *subvol_objectid,
391 struct btrfs_fs_devices **fs_devices) 393 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
392{ 394{
393 substring_t args[MAX_OPT_ARGS]; 395 substring_t args[MAX_OPT_ARGS];
394 char *opts, *orig, *p; 396 char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
429 *subvol_objectid = intarg; 431 *subvol_objectid = intarg;
430 } 432 }
431 break; 433 break;
434 case Opt_subvolrootid:
435 intarg = 0;
436 error = match_int(&args[0], &intarg);
437 if (!error) {
438 /* we want the original fs_tree */
439 if (!intarg)
440 *subvol_rootid =
441 BTRFS_FS_TREE_OBJECTID;
442 else
443 *subvol_rootid = intarg;
444 }
445 break;
432 case Opt_device: 446 case Opt_device:
433 error = btrfs_scan_one_device(match_strdup(&args[0]), 447 error = btrfs_scan_one_device(match_strdup(&args[0]),
434 flags, holder, fs_devices); 448 flags, holder, fs_devices);
@@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
736 fmode_t mode = FMODE_READ; 750 fmode_t mode = FMODE_READ;
737 char *subvol_name = NULL; 751 char *subvol_name = NULL;
738 u64 subvol_objectid = 0; 752 u64 subvol_objectid = 0;
753 u64 subvol_rootid = 0;
739 int error = 0; 754 int error = 0;
740 755
741 if (!(flags & MS_RDONLY)) 756 if (!(flags & MS_RDONLY))
@@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
743 758
744 error = btrfs_parse_early_options(data, mode, fs_type, 759 error = btrfs_parse_early_options(data, mode, fs_type,
745 &subvol_name, &subvol_objectid, 760 &subvol_name, &subvol_objectid,
746 &fs_devices); 761 &subvol_rootid, &fs_devices);
747 if (error) 762 if (error)
748 return ERR_PTR(error); 763 return ERR_PTR(error);
749 764
@@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
807 s->s_flags |= MS_ACTIVE; 822 s->s_flags |= MS_ACTIVE;
808 } 823 }
809 824
810 root = get_default_root(s, subvol_objectid);
811 if (IS_ERR(root)) {
812 error = PTR_ERR(root);
813 deactivate_locked_super(s);
814 goto error_free_subvol_name;
815 }
816 /* if they gave us a subvolume name bind mount into that */ 825 /* if they gave us a subvolume name bind mount into that */
817 if (strcmp(subvol_name, ".")) { 826 if (strcmp(subvol_name, ".")) {
818 struct dentry *new_root; 827 struct dentry *new_root;
828
829 root = get_default_root(s, subvol_rootid);
830 if (IS_ERR(root)) {
831 error = PTR_ERR(root);
832 deactivate_locked_super(s);
833 goto error_free_subvol_name;
834 }
835
819 mutex_lock(&root->d_inode->i_mutex); 836 mutex_lock(&root->d_inode->i_mutex);
820 new_root = lookup_one_len(subvol_name, root, 837 new_root = lookup_one_len(subvol_name, root,
821 strlen(subvol_name)); 838 strlen(subvol_name));
@@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
836 } 853 }
837 dput(root); 854 dput(root);
838 root = new_root; 855 root = new_root;
856 } else {
857 root = get_default_root(s, subvol_objectid);
858 if (IS_ERR(root)) {
859 error = PTR_ERR(root);
860 deactivate_locked_super(s);
861 goto error_free_subvol_name;
862 }
839 } 863 }
840 864
841 kfree(subvol_name); 865 kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5b158da7e0bb..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
32 32
33static noinline void put_transaction(struct btrfs_transaction *transaction) 33static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(transaction->use_count == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 transaction->use_count--; 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 if (transaction->use_count == 0) {
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction)); 37 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction); 38 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 } 39 }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
60 if (!cur_trans) 58 if (!cur_trans)
61 return -ENOMEM; 59 return -ENOMEM;
62 root->fs_info->generation++; 60 root->fs_info->generation++;
63 cur_trans->num_writers = 1; 61 atomic_set(&cur_trans->num_writers, 1);
64 cur_trans->num_joined = 0; 62 cur_trans->num_joined = 0;
65 cur_trans->transid = root->fs_info->generation; 63 cur_trans->transid = root->fs_info->generation;
66 init_waitqueue_head(&cur_trans->writer_wait); 64 init_waitqueue_head(&cur_trans->writer_wait);
67 init_waitqueue_head(&cur_trans->commit_wait); 65 init_waitqueue_head(&cur_trans->commit_wait);
68 cur_trans->in_commit = 0; 66 cur_trans->in_commit = 0;
69 cur_trans->blocked = 0; 67 cur_trans->blocked = 0;
70 cur_trans->use_count = 1; 68 atomic_set(&cur_trans->use_count, 1);
71 cur_trans->commit_done = 0; 69 cur_trans->commit_done = 0;
72 cur_trans->start_time = get_seconds(); 70 cur_trans->start_time = get_seconds();
73 71
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
88 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
89 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
90 } else { 88 } else {
91 cur_trans->num_writers++; 89 atomic_inc(&cur_trans->num_writers);
92 cur_trans->num_joined++; 90 cur_trans->num_joined++;
93 } 91 }
94 92
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
145 cur_trans = root->fs_info->running_transaction; 143 cur_trans = root->fs_info->running_transaction;
146 if (cur_trans && cur_trans->blocked) { 144 if (cur_trans && cur_trans->blocked) {
147 DEFINE_WAIT(wait); 145 DEFINE_WAIT(wait);
148 cur_trans->use_count++; 146 atomic_inc(&cur_trans->use_count);
149 while (1) { 147 while (1) {
150 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 148 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
151 TASK_UNINTERRUPTIBLE); 149 TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181{ 179{
182 struct btrfs_trans_handle *h; 180 struct btrfs_trans_handle *h;
183 struct btrfs_transaction *cur_trans; 181 struct btrfs_transaction *cur_trans;
182 int retries = 0;
184 int ret; 183 int ret;
185 184
186 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -204,7 +203,7 @@ again:
204 } 203 }
205 204
206 cur_trans = root->fs_info->running_transaction; 205 cur_trans = root->fs_info->running_transaction;
207 cur_trans->use_count++; 206 atomic_inc(&cur_trans->use_count);
208 if (type != TRANS_JOIN_NOLOCK) 207 if (type != TRANS_JOIN_NOLOCK)
209 mutex_unlock(&root->fs_info->trans_mutex); 208 mutex_unlock(&root->fs_info->trans_mutex);
210 209
@@ -224,10 +223,18 @@ again:
224 223
225 if (num_items > 0) { 224 if (num_items > 0) {
226 ret = btrfs_trans_reserve_metadata(h, root, num_items); 225 ret = btrfs_trans_reserve_metadata(h, root, num_items);
227 if (ret == -EAGAIN) { 226 if (ret == -EAGAIN && !retries) {
227 retries++;
228 btrfs_commit_transaction(h, root); 228 btrfs_commit_transaction(h, root);
229 goto again; 229 goto again;
230 } else if (ret == -EAGAIN) {
231 /*
232 * We have already retried and got EAGAIN, so really we
233 * don't have space, so set ret to -ENOSPC.
234 */
235 ret = -ENOSPC;
230 } 236 }
237
231 if (ret < 0) { 238 if (ret < 0) {
232 btrfs_end_transaction(h, root); 239 btrfs_end_transaction(h, root);
233 return ERR_PTR(ret); 240 return ERR_PTR(ret);
@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
327 goto out_unlock; /* nothing committing|committed */ 334 goto out_unlock; /* nothing committing|committed */
328 } 335 }
329 336
330 cur_trans->use_count++; 337 atomic_inc(&cur_trans->use_count);
331 mutex_unlock(&root->fs_info->trans_mutex); 338 mutex_unlock(&root->fs_info->trans_mutex);
332 339
333 wait_for_commit(root, cur_trans); 340 wait_for_commit(root, cur_trans);
@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
457 wake_up_process(info->transaction_kthread); 464 wake_up_process(info->transaction_kthread);
458 } 465 }
459 466
460 if (lock)
461 mutex_lock(&info->trans_mutex);
462 WARN_ON(cur_trans != info->running_transaction); 467 WARN_ON(cur_trans != info->running_transaction);
463 WARN_ON(cur_trans->num_writers < 1); 468 WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
464 cur_trans->num_writers--; 469 atomic_dec(&cur_trans->num_writers);
465 470
466 smp_mb(); 471 smp_mb();
467 if (waitqueue_active(&cur_trans->writer_wait)) 472 if (waitqueue_active(&cur_trans->writer_wait))
468 wake_up(&cur_trans->writer_wait); 473 wake_up(&cur_trans->writer_wait);
469 put_transaction(cur_trans); 474 put_transaction(cur_trans);
470 if (lock)
471 mutex_unlock(&info->trans_mutex);
472 475
473 if (current->journal_info == trans) 476 if (current->journal_info == trans)
474 current->journal_info = NULL; 477 current->journal_info = NULL;
@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1178 /* take transaction reference */ 1181 /* take transaction reference */
1179 mutex_lock(&root->fs_info->trans_mutex); 1182 mutex_lock(&root->fs_info->trans_mutex);
1180 cur_trans = trans->transaction; 1183 cur_trans = trans->transaction;
1181 cur_trans->use_count++; 1184 atomic_inc(&cur_trans->use_count);
1182 mutex_unlock(&root->fs_info->trans_mutex); 1185 mutex_unlock(&root->fs_info->trans_mutex);
1183 1186
1184 btrfs_end_transaction(trans, root); 1187 btrfs_end_transaction(trans, root);
@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1237 1240
1238 mutex_lock(&root->fs_info->trans_mutex); 1241 mutex_lock(&root->fs_info->trans_mutex);
1239 if (cur_trans->in_commit) { 1242 if (cur_trans->in_commit) {
1240 cur_trans->use_count++; 1243 atomic_inc(&cur_trans->use_count);
1241 mutex_unlock(&root->fs_info->trans_mutex); 1244 mutex_unlock(&root->fs_info->trans_mutex);
1242 btrfs_end_transaction(trans, root); 1245 btrfs_end_transaction(trans, root);
1243 1246
@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1259 prev_trans = list_entry(cur_trans->list.prev, 1262 prev_trans = list_entry(cur_trans->list.prev,
1260 struct btrfs_transaction, list); 1263 struct btrfs_transaction, list);
1261 if (!prev_trans->commit_done) { 1264 if (!prev_trans->commit_done) {
1262 prev_trans->use_count++; 1265 atomic_inc(&prev_trans->use_count);
1263 mutex_unlock(&root->fs_info->trans_mutex); 1266 mutex_unlock(&root->fs_info->trans_mutex);
1264 1267
1265 wait_for_commit(root, prev_trans); 1268 wait_for_commit(root, prev_trans);
@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1300 TASK_UNINTERRUPTIBLE); 1303 TASK_UNINTERRUPTIBLE);
1301 1304
1302 smp_mb(); 1305 smp_mb();
1303 if (cur_trans->num_writers > 1) 1306 if (atomic_read(&cur_trans->num_writers) > 1)
1304 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1307 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1305 else if (should_grow) 1308 else if (should_grow)
1306 schedule_timeout(1); 1309 schedule_timeout(1);
1307 1310
1308 mutex_lock(&root->fs_info->trans_mutex); 1311 mutex_lock(&root->fs_info->trans_mutex);
1309 finish_wait(&cur_trans->writer_wait, &wait); 1312 finish_wait(&cur_trans->writer_wait, &wait);
1310 } while (cur_trans->num_writers > 1 || 1313 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1311 (should_grow && cur_trans->num_joined != joined)); 1314 (should_grow && cur_trans->num_joined != joined));
1312 1315
1313 ret = create_pending_snapshots(trans, root->fs_info); 1316 ret = create_pending_snapshots(trans, root->fs_info);
@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1394 1397
1395 wake_up(&cur_trans->commit_wait); 1398 wake_up(&cur_trans->commit_wait);
1396 1399
1400 list_del_init(&cur_trans->list);
1397 put_transaction(cur_trans); 1401 put_transaction(cur_trans);
1398 put_transaction(cur_trans); 1402 put_transaction(cur_trans);
1399 1403
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
27 * total writers in this transaction, it must be zero before the 27 * total writers in this transaction, it must be zero before the
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 unsigned long num_writers; 30 atomic_t num_writers;
31 31
32 unsigned long num_joined; 32 unsigned long num_joined;
33 int in_commit; 33 int in_commit;
34 int use_count; 34 atomic_t use_count;
35 int commit_done; 35 int commit_done;
36 int blocked; 36 int blocked;
37 struct list_head list; 37 struct list_head list;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c50271ad3157..f997ec0c1ba4 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2209,8 +2209,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2209 2209
2210 log = root->log_root; 2210 log = root->log_root;
2211 path = btrfs_alloc_path(); 2211 path = btrfs_alloc_path();
2212 if (!path) 2212 if (!path) {
2213 return -ENOMEM; 2213 err = -ENOMEM;
2214 goto out_unlock;
2215 }
2214 2216
2215 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, 2217 di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
2216 name, name_len, -1); 2218 name, name_len, -1);
@@ -2271,6 +2273,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2271 } 2273 }
2272fail: 2274fail:
2273 btrfs_free_path(path); 2275 btrfs_free_path(path);
2276out_unlock:
2274 mutex_unlock(&BTRFS_I(dir)->log_mutex); 2277 mutex_unlock(&BTRFS_I(dir)->log_mutex);
2275 if (ret == -ENOSPC) { 2278 if (ret == -ENOSPC) {
2276 root->fs_info->last_trans_log_full_commit = trans->transid; 2279 root->fs_info->last_trans_log_full_commit = trans->transid;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 309a57b9fc85..c7367ae5a3e6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -155,6 +155,15 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
155 unsigned long limit; 155 unsigned long limit;
156 unsigned long last_waited = 0; 156 unsigned long last_waited = 0;
157 int force_reg = 0; 157 int force_reg = 0;
158 struct blk_plug plug;
159
160 /*
161 * this function runs all the bios we've collected for
162 * a particular device. We don't want to wander off to
163 * another device without first sending all of these down.
164 * So, setup a plug here and finish it off before we return
165 */
166 blk_start_plug(&plug);
158 167
159 bdi = blk_get_backing_dev_info(device->bdev); 168 bdi = blk_get_backing_dev_info(device->bdev);
160 fs_info = device->dev_root->fs_info; 169 fs_info = device->dev_root->fs_info;
@@ -294,6 +303,7 @@ loop_lock:
294 spin_unlock(&device->io_lock); 303 spin_unlock(&device->io_lock);
295 304
296done: 305done:
306 blk_finish_plug(&plug);
297 return 0; 307 return 0;
298} 308}
299 309
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5303b871b13..cfd660550ded 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
182 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
183 int ret = 0, slot, advance; 183 int ret = 0, slot;
184 size_t total_size = 0, size_left = size; 184 size_t total_size = 0, size_left = size;
185 unsigned long name_ptr; 185 unsigned long name_ptr;
186 size_t name_len; 186 size_t name_len;
187 u32 nritems;
188 187
189 /* 188 /*
190 * ok we want all objects associated with this id. 189 * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
204 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 203 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
205 if (ret < 0) 204 if (ret < 0)
206 goto err; 205 goto err;
207 advance = 0; 206
208 while (1) { 207 while (1) {
209 leaf = path->nodes[0]; 208 leaf = path->nodes[0];
210 nritems = btrfs_header_nritems(leaf);
211 slot = path->slots[0]; 209 slot = path->slots[0];
212 210
213 /* this is where we start walking through the path */ 211 /* this is where we start walking through the path */
214 if (advance || slot >= nritems) { 212 if (slot >= btrfs_header_nritems(leaf)) {
215 /* 213 /*
216 * if we've reached the last slot in this leaf we need 214 * if we've reached the last slot in this leaf we need
217 * to go to the next leaf and reset everything 215 * to go to the next leaf and reset everything
218 */ 216 */
219 if (slot >= nritems-1) { 217 ret = btrfs_next_leaf(root, path);
220 ret = btrfs_next_leaf(root, path); 218 if (ret < 0)
221 if (ret) 219 goto err;
222 break; 220 else if (ret > 0)
223 leaf = path->nodes[0]; 221 break;
224 nritems = btrfs_header_nritems(leaf); 222 continue;
225 slot = path->slots[0];
226 } else {
227 /*
228 * just walking through the slots on this leaf
229 */
230 slot++;
231 path->slots[0]++;
232 }
233 } 223 }
234 advance = 1;
235 224
236 btrfs_item_key_to_cpu(leaf, &found_key, slot); 225 btrfs_item_key_to_cpu(leaf, &found_key, slot);
237 226
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
250 239
251 /* we are just looking for how big our buffer needs to be */ 240 /* we are just looking for how big our buffer needs to be */
252 if (!size) 241 if (!size)
253 continue; 242 goto next;
254 243
255 if (!buffer || (name_len + 1) > size_left) { 244 if (!buffer || (name_len + 1) > size_left) {
256 ret = -ERANGE; 245 ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
263 252
264 size_left -= name_len + 1; 253 size_left -= name_len + 1;
265 buffer += name_len + 1; 254 buffer += name_len + 1;
255next:
256 path->slots[0]++;
266 } 257 }
267 ret = total_size; 258 ret = total_size;
268 259
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index db9d55b507d0..4bc862a80efa 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -807,8 +807,7 @@ static int
807cifs_parse_mount_options(char *options, const char *devname, 807cifs_parse_mount_options(char *options, const char *devname,
808 struct smb_vol *vol) 808 struct smb_vol *vol)
809{ 809{
810 char *value; 810 char *value, *data, *end;
811 char *data;
812 unsigned int temp_len, i, j; 811 unsigned int temp_len, i, j;
813 char separator[2]; 812 char separator[2];
814 short int override_uid = -1; 813 short int override_uid = -1;
@@ -851,6 +850,7 @@ cifs_parse_mount_options(char *options, const char *devname,
851 if (!options) 850 if (!options)
852 return 1; 851 return 1;
853 852
853 end = options + strlen(options);
854 if (strncmp(options, "sep=", 4) == 0) { 854 if (strncmp(options, "sep=", 4) == 0) {
855 if (options[4] != 0) { 855 if (options[4] != 0) {
856 separator[0] = options[4]; 856 separator[0] = options[4];
@@ -916,6 +916,7 @@ cifs_parse_mount_options(char *options, const char *devname,
916 the only illegal character in a password is null */ 916 the only illegal character in a password is null */
917 917
918 if ((value[temp_len] == 0) && 918 if ((value[temp_len] == 0) &&
919 (value + temp_len < end) &&
919 (value[temp_len+1] == separator[0])) { 920 (value[temp_len+1] == separator[0])) {
920 /* reinsert comma */ 921 /* reinsert comma */
921 value[temp_len] = separator[0]; 922 value[temp_len] = separator[0];
diff --git a/fs/dcache.c b/fs/dcache.c
index 129a35730994..22a0ef41bad1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -99,12 +99,9 @@ static struct kmem_cache *dentry_cache __read_mostly;
99static unsigned int d_hash_mask __read_mostly; 99static unsigned int d_hash_mask __read_mostly;
100static unsigned int d_hash_shift __read_mostly; 100static unsigned int d_hash_shift __read_mostly;
101 101
102struct dcache_hash_bucket { 102static struct hlist_bl_head *dentry_hashtable __read_mostly;
103 struct hlist_bl_head head;
104};
105static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
106 103
107static inline struct dcache_hash_bucket *d_hash(struct dentry *parent, 104static inline struct hlist_bl_head *d_hash(struct dentry *parent,
108 unsigned long hash) 105 unsigned long hash)
109{ 106{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES; 107 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -112,16 +109,6 @@ static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
112 return dentry_hashtable + (hash & D_HASHMASK); 109 return dentry_hashtable + (hash & D_HASHMASK);
113} 110}
114 111
115static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
116{
117 bit_spin_lock(0, (unsigned long *)&b->head.first);
118}
119
120static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
121{
122 __bit_spin_unlock(0, (unsigned long *)&b->head.first);
123}
124
125/* Statistics gathering. */ 112/* Statistics gathering. */
126struct dentry_stat_t dentry_stat = { 113struct dentry_stat_t dentry_stat = {
127 .age_limit = 45, 114 .age_limit = 45,
@@ -167,8 +154,8 @@ static void d_free(struct dentry *dentry)
167 if (dentry->d_op && dentry->d_op->d_release) 154 if (dentry->d_op && dentry->d_op->d_release)
168 dentry->d_op->d_release(dentry); 155 dentry->d_op->d_release(dentry);
169 156
170 /* if dentry was never inserted into hash, immediate free is OK */ 157 /* if dentry was never visible to RCU, immediate free is OK */
171 if (hlist_bl_unhashed(&dentry->d_hash)) 158 if (!(dentry->d_flags & DCACHE_RCUACCESS))
172 __d_free(&dentry->d_u.d_rcu); 159 __d_free(&dentry->d_u.d_rcu);
173 else 160 else
174 call_rcu(&dentry->d_u.d_rcu, __d_free); 161 call_rcu(&dentry->d_u.d_rcu, __d_free);
@@ -330,28 +317,19 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
330 */ 317 */
331void __d_drop(struct dentry *dentry) 318void __d_drop(struct dentry *dentry)
332{ 319{
333 if (!(dentry->d_flags & DCACHE_UNHASHED)) { 320 if (!d_unhashed(dentry)) {
334 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { 321 struct hlist_bl_head *b;
335 bit_spin_lock(0, 322 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
336 (unsigned long *)&dentry->d_sb->s_anon.first); 323 b = &dentry->d_sb->s_anon;
337 dentry->d_flags |= DCACHE_UNHASHED; 324 else
338 hlist_bl_del_init(&dentry->d_hash);
339 __bit_spin_unlock(0,
340 (unsigned long *)&dentry->d_sb->s_anon.first);
341 } else {
342 struct dcache_hash_bucket *b;
343 b = d_hash(dentry->d_parent, dentry->d_name.hash); 325 b = d_hash(dentry->d_parent, dentry->d_name.hash);
344 spin_lock_bucket(b); 326
345 /* 327 hlist_bl_lock(b);
346 * We may not actually need to put DCACHE_UNHASHED 328 __hlist_bl_del(&dentry->d_hash);
347 * manipulations under the hash lock, but follow 329 dentry->d_hash.pprev = NULL;
348 * the principle of least surprise. 330 hlist_bl_unlock(b);
349 */ 331
350 dentry->d_flags |= DCACHE_UNHASHED; 332 dentry_rcuwalk_barrier(dentry);
351 hlist_bl_del_rcu(&dentry->d_hash);
352 spin_unlock_bucket(b);
353 dentry_rcuwalk_barrier(dentry);
354 }
355 } 333 }
356} 334}
357EXPORT_SYMBOL(__d_drop); 335EXPORT_SYMBOL(__d_drop);
@@ -1304,7 +1282,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1304 dname[name->len] = 0; 1282 dname[name->len] = 0;
1305 1283
1306 dentry->d_count = 1; 1284 dentry->d_count = 1;
1307 dentry->d_flags = DCACHE_UNHASHED; 1285 dentry->d_flags = 0;
1308 spin_lock_init(&dentry->d_lock); 1286 spin_lock_init(&dentry->d_lock);
1309 seqcount_init(&dentry->d_seq); 1287 seqcount_init(&dentry->d_seq);
1310 dentry->d_inode = NULL; 1288 dentry->d_inode = NULL;
@@ -1606,10 +1584,9 @@ struct dentry *d_obtain_alias(struct inode *inode)
1606 tmp->d_inode = inode; 1584 tmp->d_inode = inode;
1607 tmp->d_flags |= DCACHE_DISCONNECTED; 1585 tmp->d_flags |= DCACHE_DISCONNECTED;
1608 list_add(&tmp->d_alias, &inode->i_dentry); 1586 list_add(&tmp->d_alias, &inode->i_dentry);
1609 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1587 hlist_bl_lock(&tmp->d_sb->s_anon);
1610 tmp->d_flags &= ~DCACHE_UNHASHED;
1611 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1588 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1612 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1589 hlist_bl_unlock(&tmp->d_sb->s_anon);
1613 spin_unlock(&tmp->d_lock); 1590 spin_unlock(&tmp->d_lock);
1614 spin_unlock(&inode->i_lock); 1591 spin_unlock(&inode->i_lock);
1615 security_d_instantiate(tmp, inode); 1592 security_d_instantiate(tmp, inode);
@@ -1789,7 +1766,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1789 unsigned int len = name->len; 1766 unsigned int len = name->len;
1790 unsigned int hash = name->hash; 1767 unsigned int hash = name->hash;
1791 const unsigned char *str = name->name; 1768 const unsigned char *str = name->name;
1792 struct dcache_hash_bucket *b = d_hash(parent, hash); 1769 struct hlist_bl_head *b = d_hash(parent, hash);
1793 struct hlist_bl_node *node; 1770 struct hlist_bl_node *node;
1794 struct dentry *dentry; 1771 struct dentry *dentry;
1795 1772
@@ -1813,7 +1790,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1813 * 1790 *
1814 * See Documentation/filesystems/path-lookup.txt for more details. 1791 * See Documentation/filesystems/path-lookup.txt for more details.
1815 */ 1792 */
1816 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1793 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1817 struct inode *i; 1794 struct inode *i;
1818 const char *tname; 1795 const char *tname;
1819 int tlen; 1796 int tlen;
@@ -1908,7 +1885,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1908 unsigned int len = name->len; 1885 unsigned int len = name->len;
1909 unsigned int hash = name->hash; 1886 unsigned int hash = name->hash;
1910 const unsigned char *str = name->name; 1887 const unsigned char *str = name->name;
1911 struct dcache_hash_bucket *b = d_hash(parent, hash); 1888 struct hlist_bl_head *b = d_hash(parent, hash);
1912 struct hlist_bl_node *node; 1889 struct hlist_bl_node *node;
1913 struct dentry *found = NULL; 1890 struct dentry *found = NULL;
1914 struct dentry *dentry; 1891 struct dentry *dentry;
@@ -1935,7 +1912,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1935 */ 1912 */
1936 rcu_read_lock(); 1913 rcu_read_lock();
1937 1914
1938 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1915 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1939 const char *tname; 1916 const char *tname;
1940 int tlen; 1917 int tlen;
1941 1918
@@ -2086,13 +2063,13 @@ again:
2086} 2063}
2087EXPORT_SYMBOL(d_delete); 2064EXPORT_SYMBOL(d_delete);
2088 2065
2089static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b) 2066static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
2090{ 2067{
2091 BUG_ON(!d_unhashed(entry)); 2068 BUG_ON(!d_unhashed(entry));
2092 spin_lock_bucket(b); 2069 hlist_bl_lock(b);
2093 entry->d_flags &= ~DCACHE_UNHASHED; 2070 entry->d_flags |= DCACHE_RCUACCESS;
2094 hlist_bl_add_head_rcu(&entry->d_hash, &b->head); 2071 hlist_bl_add_head_rcu(&entry->d_hash, b);
2095 spin_unlock_bucket(b); 2072 hlist_bl_unlock(b);
2096} 2073}
2097 2074
2098static void _d_rehash(struct dentry * entry) 2075static void _d_rehash(struct dentry * entry)
@@ -3025,7 +3002,7 @@ static void __init dcache_init_early(void)
3025 3002
3026 dentry_hashtable = 3003 dentry_hashtable =
3027 alloc_large_system_hash("Dentry cache", 3004 alloc_large_system_hash("Dentry cache",
3028 sizeof(struct dcache_hash_bucket), 3005 sizeof(struct hlist_bl_head),
3029 dhash_entries, 3006 dhash_entries,
3030 13, 3007 13,
3031 HASH_EARLY, 3008 HASH_EARLY,
@@ -3034,7 +3011,7 @@ static void __init dcache_init_early(void)
3034 0); 3011 0);
3035 3012
3036 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3013 for (loop = 0; loop < (1 << d_hash_shift); loop++)
3037 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3014 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3038} 3015}
3039 3016
3040static void __init dcache_init(void) 3017static void __init dcache_init(void)
@@ -3057,7 +3034,7 @@ static void __init dcache_init(void)
3057 3034
3058 dentry_hashtable = 3035 dentry_hashtable =
3059 alloc_large_system_hash("Dentry cache", 3036 alloc_large_system_hash("Dentry cache",
3060 sizeof(struct dcache_hash_bucket), 3037 sizeof(struct hlist_bl_head),
3061 dhash_entries, 3038 dhash_entries,
3062 13, 3039 13,
3063 0, 3040 0,
@@ -3066,7 +3043,7 @@ static void __init dcache_init(void)
3066 0); 3043 0);
3067 3044
3068 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3045 for (loop = 0; loop < (1 << d_hash_shift); loop++)
3069 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head); 3046 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3070} 3047}
3071 3048
3072/* SLAB cache for __getname() consumers */ 3049/* SLAB cache for __getname() consumers */
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index d2a70a4561f9..b8d5c8091024 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1452,6 +1452,25 @@ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
1452 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; 1452 crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
1453} 1453}
1454 1454
1455void ecryptfs_i_size_init(const char *page_virt, struct inode *inode)
1456{
1457 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
1458 struct ecryptfs_crypt_stat *crypt_stat;
1459 u64 file_size;
1460
1461 crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
1462 mount_crypt_stat =
1463 &ecryptfs_superblock_to_private(inode->i_sb)->mount_crypt_stat;
1464 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
1465 file_size = i_size_read(ecryptfs_inode_to_lower(inode));
1466 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
1467 file_size += crypt_stat->metadata_size;
1468 } else
1469 file_size = get_unaligned_be64(page_virt);
1470 i_size_write(inode, (loff_t)file_size);
1471 crypt_stat->flags |= ECRYPTFS_I_SIZE_INITIALIZED;
1472}
1473
1455/** 1474/**
1456 * ecryptfs_read_headers_virt 1475 * ecryptfs_read_headers_virt
1457 * @page_virt: The virtual address into which to read the headers 1476 * @page_virt: The virtual address into which to read the headers
@@ -1482,6 +1501,8 @@ static int ecryptfs_read_headers_virt(char *page_virt,
1482 rc = -EINVAL; 1501 rc = -EINVAL;
1483 goto out; 1502 goto out;
1484 } 1503 }
1504 if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED))
1505 ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
1485 offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1506 offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1486 rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset), 1507 rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
1487 &bytes_read); 1508 &bytes_read);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index bd3cafd0949d..e70282775e2c 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat {
269#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00000800 269#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00000800
270#define ECRYPTFS_ENCFN_USE_FEK 0x00001000 270#define ECRYPTFS_ENCFN_USE_FEK 0x00001000
271#define ECRYPTFS_UNLINK_SIGS 0x00002000 271#define ECRYPTFS_UNLINK_SIGS 0x00002000
272#define ECRYPTFS_I_SIZE_INITIALIZED 0x00004000
272 u32 flags; 273 u32 flags;
273 unsigned int file_version; 274 unsigned int file_version;
274 size_t iv_bytes; 275 size_t iv_bytes;
@@ -295,6 +296,8 @@ struct ecryptfs_crypt_stat {
295struct ecryptfs_inode_info { 296struct ecryptfs_inode_info {
296 struct inode vfs_inode; 297 struct inode vfs_inode;
297 struct inode *wii_inode; 298 struct inode *wii_inode;
299 struct mutex lower_file_mutex;
300 atomic_t lower_file_count;
298 struct file *lower_file; 301 struct file *lower_file;
299 struct ecryptfs_crypt_stat crypt_stat; 302 struct ecryptfs_crypt_stat crypt_stat;
300}; 303};
@@ -626,6 +629,7 @@ struct ecryptfs_open_req {
626int ecryptfs_interpose(struct dentry *hidden_dentry, 629int ecryptfs_interpose(struct dentry *hidden_dentry,
627 struct dentry *this_dentry, struct super_block *sb, 630 struct dentry *this_dentry, struct super_block *sb,
628 u32 flags); 631 u32 flags);
632void ecryptfs_i_size_init(const char *page_virt, struct inode *inode);
629int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, 633int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
630 struct dentry *lower_dentry, 634 struct dentry *lower_dentry,
631 struct inode *ecryptfs_dir_inode); 635 struct inode *ecryptfs_dir_inode);
@@ -757,7 +761,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
757 struct dentry *lower_dentry, 761 struct dentry *lower_dentry,
758 struct vfsmount *lower_mnt, 762 struct vfsmount *lower_mnt,
759 const struct cred *cred); 763 const struct cred *cred);
760int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry); 764int ecryptfs_get_lower_file(struct dentry *ecryptfs_dentry);
765void ecryptfs_put_lower_file(struct inode *inode);
761int 766int
762ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, 767ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
763 size_t *packet_size, 768 size_t *packet_size,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index cedc913d11ba..566e5472f78c 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -191,10 +191,10 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
191 | ECRYPTFS_ENCRYPTED); 191 | ECRYPTFS_ENCRYPTED);
192 } 192 }
193 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
194 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 194 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
195 if (rc) { 195 if (rc) {
196 printk(KERN_ERR "%s: Error attempting to initialize " 196 printk(KERN_ERR "%s: Error attempting to initialize "
197 "the persistent file for the dentry with name " 197 "the lower file for the dentry with name "
198 "[%s]; rc = [%d]\n", __func__, 198 "[%s]; rc = [%d]\n", __func__,
199 ecryptfs_dentry->d_name.name, rc); 199 ecryptfs_dentry->d_name.name, rc);
200 goto out_free; 200 goto out_free;
@@ -202,9 +202,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
202 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE) 202 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_ACCMODE)
203 == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) { 203 == O_RDONLY && (file->f_flags & O_ACCMODE) != O_RDONLY) {
204 rc = -EPERM; 204 rc = -EPERM;
205 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " 205 printk(KERN_WARNING "%s: Lower file is RO; eCryptfs "
206 "file must hence be opened RO\n", __func__); 206 "file must hence be opened RO\n", __func__);
207 goto out_free; 207 goto out_put;
208 } 208 }
209 ecryptfs_set_file_lower( 209 ecryptfs_set_file_lower(
210 file, ecryptfs_inode_to_private(inode)->lower_file); 210 file, ecryptfs_inode_to_private(inode)->lower_file);
@@ -232,10 +232,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
232 "Plaintext passthrough mode is not " 232 "Plaintext passthrough mode is not "
233 "enabled; returning -EIO\n"); 233 "enabled; returning -EIO\n");
234 mutex_unlock(&crypt_stat->cs_mutex); 234 mutex_unlock(&crypt_stat->cs_mutex);
235 goto out_free; 235 goto out_put;
236 } 236 }
237 rc = 0; 237 rc = 0;
238 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 238 crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
239 | ECRYPTFS_ENCRYPTED);
239 mutex_unlock(&crypt_stat->cs_mutex); 240 mutex_unlock(&crypt_stat->cs_mutex);
240 goto out; 241 goto out;
241 } 242 }
@@ -245,6 +246,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
245 "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, 246 "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
246 (unsigned long long)i_size_read(inode)); 247 (unsigned long long)i_size_read(inode));
247 goto out; 248 goto out;
249out_put:
250 ecryptfs_put_lower_file(inode);
248out_free: 251out_free:
249 kmem_cache_free(ecryptfs_file_info_cache, 252 kmem_cache_free(ecryptfs_file_info_cache,
250 ecryptfs_file_to_private(file)); 253 ecryptfs_file_to_private(file));
@@ -254,17 +257,13 @@ out:
254 257
255static int ecryptfs_flush(struct file *file, fl_owner_t td) 258static int ecryptfs_flush(struct file *file, fl_owner_t td)
256{ 259{
257 int rc = 0; 260 return file->f_mode & FMODE_WRITE
258 struct file *lower_file = NULL; 261 ? filemap_write_and_wait(file->f_mapping) : 0;
259
260 lower_file = ecryptfs_file_to_lower(file);
261 if (lower_file->f_op && lower_file->f_op->flush)
262 rc = lower_file->f_op->flush(lower_file, td);
263 return rc;
264} 262}
265 263
266static int ecryptfs_release(struct inode *inode, struct file *file) 264static int ecryptfs_release(struct inode *inode, struct file *file)
267{ 265{
266 ecryptfs_put_lower_file(inode);
268 kmem_cache_free(ecryptfs_file_info_cache, 267 kmem_cache_free(ecryptfs_file_info_cache,
269 ecryptfs_file_to_private(file)); 268 ecryptfs_file_to_private(file));
270 return 0; 269 return 0;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index f99051b7adab..4d4cc6a90cd5 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -168,19 +168,18 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
168 "context; rc = [%d]\n", rc); 168 "context; rc = [%d]\n", rc);
169 goto out; 169 goto out;
170 } 170 }
171 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 171 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
172 if (rc) { 172 if (rc) {
173 printk(KERN_ERR "%s: Error attempting to initialize " 173 printk(KERN_ERR "%s: Error attempting to initialize "
174 "the persistent file for the dentry with name " 174 "the lower file for the dentry with name "
175 "[%s]; rc = [%d]\n", __func__, 175 "[%s]; rc = [%d]\n", __func__,
176 ecryptfs_dentry->d_name.name, rc); 176 ecryptfs_dentry->d_name.name, rc);
177 goto out; 177 goto out;
178 } 178 }
179 rc = ecryptfs_write_metadata(ecryptfs_dentry); 179 rc = ecryptfs_write_metadata(ecryptfs_dentry);
180 if (rc) { 180 if (rc)
181 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); 181 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
182 goto out; 182 ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
183 }
184out: 183out:
185 return rc; 184 return rc;
186} 185}
@@ -226,11 +225,9 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
226 struct dentry *lower_dir_dentry; 225 struct dentry *lower_dir_dentry;
227 struct vfsmount *lower_mnt; 226 struct vfsmount *lower_mnt;
228 struct inode *lower_inode; 227 struct inode *lower_inode;
229 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
230 struct ecryptfs_crypt_stat *crypt_stat; 228 struct ecryptfs_crypt_stat *crypt_stat;
231 char *page_virt = NULL; 229 char *page_virt = NULL;
232 u64 file_size; 230 int put_lower = 0, rc = 0;
233 int rc = 0;
234 231
235 lower_dir_dentry = lower_dentry->d_parent; 232 lower_dir_dentry = lower_dentry->d_parent;
236 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( 233 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
@@ -277,14 +274,15 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
277 rc = -ENOMEM; 274 rc = -ENOMEM;
278 goto out; 275 goto out;
279 } 276 }
280 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 277 rc = ecryptfs_get_lower_file(ecryptfs_dentry);
281 if (rc) { 278 if (rc) {
282 printk(KERN_ERR "%s: Error attempting to initialize " 279 printk(KERN_ERR "%s: Error attempting to initialize "
283 "the persistent file for the dentry with name " 280 "the lower file for the dentry with name "
284 "[%s]; rc = [%d]\n", __func__, 281 "[%s]; rc = [%d]\n", __func__,
285 ecryptfs_dentry->d_name.name, rc); 282 ecryptfs_dentry->d_name.name, rc);
286 goto out_free_kmem; 283 goto out_free_kmem;
287 } 284 }
285 put_lower = 1;
288 crypt_stat = &ecryptfs_inode_to_private( 286 crypt_stat = &ecryptfs_inode_to_private(
289 ecryptfs_dentry->d_inode)->crypt_stat; 287 ecryptfs_dentry->d_inode)->crypt_stat;
290 /* TODO: lock for crypt_stat comparison */ 288 /* TODO: lock for crypt_stat comparison */
@@ -302,18 +300,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
302 } 300 }
303 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; 301 crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
304 } 302 }
305 mount_crypt_stat = &ecryptfs_superblock_to_private( 303 ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
306 ecryptfs_dentry->d_sb)->mount_crypt_stat;
307 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
308 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
309 file_size = (crypt_stat->metadata_size
310 + i_size_read(lower_dentry->d_inode));
311 else
312 file_size = i_size_read(lower_dentry->d_inode);
313 } else {
314 file_size = get_unaligned_be64(page_virt);
315 }
316 i_size_write(ecryptfs_dentry->d_inode, (loff_t)file_size);
317out_free_kmem: 304out_free_kmem:
318 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 305 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
319 goto out; 306 goto out;
@@ -322,6 +309,8 @@ out_put:
322 mntput(lower_mnt); 309 mntput(lower_mnt);
323 d_drop(ecryptfs_dentry); 310 d_drop(ecryptfs_dentry);
324out: 311out:
312 if (put_lower)
313 ecryptfs_put_lower_file(ecryptfs_dentry->d_inode);
325 return rc; 314 return rc;
326} 315}
327 316
@@ -538,8 +527,6 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
538 dget(lower_dentry); 527 dget(lower_dentry);
539 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); 528 rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
540 dput(lower_dentry); 529 dput(lower_dentry);
541 if (!rc)
542 d_delete(lower_dentry);
543 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); 530 fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
544 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink; 531 dir->i_nlink = lower_dir_dentry->d_inode->i_nlink;
545 unlock_dir(lower_dir_dentry); 532 unlock_dir(lower_dir_dentry);
@@ -610,8 +597,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
610 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); 597 fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
611out_lock: 598out_lock:
612 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 599 unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
613 dput(lower_new_dentry->d_parent); 600 dput(lower_new_dir_dentry);
614 dput(lower_old_dentry->d_parent); 601 dput(lower_old_dir_dentry);
615 dput(lower_new_dentry); 602 dput(lower_new_dentry);
616 dput(lower_old_dentry); 603 dput(lower_old_dentry);
617 return rc; 604 return rc;
@@ -759,8 +746,11 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
759 746
760 if (unlikely((ia->ia_size == i_size))) { 747 if (unlikely((ia->ia_size == i_size))) {
761 lower_ia->ia_valid &= ~ATTR_SIZE; 748 lower_ia->ia_valid &= ~ATTR_SIZE;
762 goto out; 749 return 0;
763 } 750 }
751 rc = ecryptfs_get_lower_file(dentry);
752 if (rc)
753 return rc;
764 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 754 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
765 /* Switch on growing or shrinking file */ 755 /* Switch on growing or shrinking file */
766 if (ia->ia_size > i_size) { 756 if (ia->ia_size > i_size) {
@@ -838,6 +828,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
838 lower_ia->ia_valid &= ~ATTR_SIZE; 828 lower_ia->ia_valid &= ~ATTR_SIZE;
839 } 829 }
840out: 830out:
831 ecryptfs_put_lower_file(inode);
841 return rc; 832 return rc;
842} 833}
843 834
@@ -913,7 +904,13 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
913 904
914 mount_crypt_stat = &ecryptfs_superblock_to_private( 905 mount_crypt_stat = &ecryptfs_superblock_to_private(
915 dentry->d_sb)->mount_crypt_stat; 906 dentry->d_sb)->mount_crypt_stat;
907 rc = ecryptfs_get_lower_file(dentry);
908 if (rc) {
909 mutex_unlock(&crypt_stat->cs_mutex);
910 goto out;
911 }
916 rc = ecryptfs_read_metadata(dentry); 912 rc = ecryptfs_read_metadata(dentry);
913 ecryptfs_put_lower_file(inode);
917 if (rc) { 914 if (rc) {
918 if (!(mount_crypt_stat->flags 915 if (!(mount_crypt_stat->flags
919 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { 916 & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
@@ -927,10 +924,17 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
927 goto out; 924 goto out;
928 } 925 }
929 rc = 0; 926 rc = 0;
930 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 927 crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
928 | ECRYPTFS_ENCRYPTED);
931 } 929 }
932 } 930 }
933 mutex_unlock(&crypt_stat->cs_mutex); 931 mutex_unlock(&crypt_stat->cs_mutex);
932 if (S_ISREG(inode->i_mode)) {
933 rc = filemap_write_and_wait(inode->i_mapping);
934 if (rc)
935 goto out;
936 fsstack_copy_attr_all(inode, lower_inode);
937 }
934 memcpy(&lower_ia, ia, sizeof(lower_ia)); 938 memcpy(&lower_ia, ia, sizeof(lower_ia));
935 if (ia->ia_valid & ATTR_FILE) 939 if (ia->ia_valid & ATTR_FILE)
936 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file); 940 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 0851ab6980f5..69f994a7d524 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -44,7 +44,7 @@ static struct task_struct *ecryptfs_kthread;
44 * @ignored: ignored 44 * @ignored: ignored
45 * 45 *
46 * The eCryptfs kernel thread that has the responsibility of getting 46 * The eCryptfs kernel thread that has the responsibility of getting
47 * the lower persistent file with RW permissions. 47 * the lower file with RW permissions.
48 * 48 *
49 * Returns zero on success; non-zero otherwise 49 * Returns zero on success; non-zero otherwise
50 */ 50 */
@@ -141,8 +141,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
141 int rc = 0; 141 int rc = 0;
142 142
143 /* Corresponding dput() and mntput() are done when the 143 /* Corresponding dput() and mntput() are done when the
144 * persistent file is fput() when the eCryptfs inode is 144 * lower file is fput() when all eCryptfs files for the inode are
145 * destroyed. */ 145 * released. */
146 dget(lower_dentry); 146 dget(lower_dentry);
147 mntget(lower_mnt); 147 mntget(lower_mnt);
148 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; 148 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index fdb2eb0ad09e..89b93389af8e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -96,7 +96,7 @@ void __ecryptfs_printk(const char *fmt, ...)
96} 96}
97 97
98/** 98/**
99 * ecryptfs_init_persistent_file 99 * ecryptfs_init_lower_file
100 * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with 100 * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with
101 * the lower dentry and the lower mount set 101 * the lower dentry and the lower mount set
102 * 102 *
@@ -104,42 +104,70 @@ void __ecryptfs_printk(const char *fmt, ...)
104 * inode. All I/O operations to the lower inode occur through that 104 * inode. All I/O operations to the lower inode occur through that
105 * file. When the first eCryptfs dentry that interposes with the first 105 * file. When the first eCryptfs dentry that interposes with the first
106 * lower dentry for that inode is created, this function creates the 106 * lower dentry for that inode is created, this function creates the
107 * persistent file struct and associates it with the eCryptfs 107 * lower file struct and associates it with the eCryptfs
108 * inode. When the eCryptfs inode is destroyed, the file is closed. 108 * inode. When all eCryptfs files associated with the inode are released, the
109 * file is closed.
109 * 110 *
110 * The persistent file will be opened with read/write permissions, if 111 * The lower file will be opened with read/write permissions, if
111 * possible. Otherwise, it is opened read-only. 112 * possible. Otherwise, it is opened read-only.
112 * 113 *
113 * This function does nothing if a lower persistent file is already 114 * This function does nothing if a lower file is already
114 * associated with the eCryptfs inode. 115 * associated with the eCryptfs inode.
115 * 116 *
116 * Returns zero on success; non-zero otherwise 117 * Returns zero on success; non-zero otherwise
117 */ 118 */
118int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) 119static int ecryptfs_init_lower_file(struct dentry *dentry,
120 struct file **lower_file)
119{ 121{
120 const struct cred *cred = current_cred(); 122 const struct cred *cred = current_cred();
121 struct ecryptfs_inode_info *inode_info = 123 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
122 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 124 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
123 int rc = 0; 125 int rc;
124 126
125 if (!inode_info->lower_file) { 127 rc = ecryptfs_privileged_open(lower_file, lower_dentry, lower_mnt,
126 struct dentry *lower_dentry; 128 cred);
127 struct vfsmount *lower_mnt = 129 if (rc) {
128 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); 130 printk(KERN_ERR "Error opening lower file "
131 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
132 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
133 (*lower_file) = NULL;
134 }
135 return rc;
136}
129 137
130 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 138int ecryptfs_get_lower_file(struct dentry *dentry)
131 rc = ecryptfs_privileged_open(&inode_info->lower_file, 139{
132 lower_dentry, lower_mnt, cred); 140 struct ecryptfs_inode_info *inode_info =
133 if (rc) { 141 ecryptfs_inode_to_private(dentry->d_inode);
134 printk(KERN_ERR "Error opening lower persistent file " 142 int count, rc = 0;
135 "for lower_dentry [0x%p] and lower_mnt [0x%p]; " 143
136 "rc = [%d]\n", lower_dentry, lower_mnt, rc); 144 mutex_lock(&inode_info->lower_file_mutex);
137 inode_info->lower_file = NULL; 145 count = atomic_inc_return(&inode_info->lower_file_count);
138 } 146 if (WARN_ON_ONCE(count < 1))
147 rc = -EINVAL;
148 else if (count == 1) {
149 rc = ecryptfs_init_lower_file(dentry,
150 &inode_info->lower_file);
151 if (rc)
152 atomic_set(&inode_info->lower_file_count, 0);
139 } 153 }
154 mutex_unlock(&inode_info->lower_file_mutex);
140 return rc; 155 return rc;
141} 156}
142 157
158void ecryptfs_put_lower_file(struct inode *inode)
159{
160 struct ecryptfs_inode_info *inode_info;
161
162 inode_info = ecryptfs_inode_to_private(inode);
163 if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
164 &inode_info->lower_file_mutex)) {
165 fput(inode_info->lower_file);
166 inode_info->lower_file = NULL;
167 mutex_unlock(&inode_info->lower_file_mutex);
168 }
169}
170
143static struct inode *ecryptfs_get_inode(struct inode *lower_inode, 171static struct inode *ecryptfs_get_inode(struct inode *lower_inode,
144 struct super_block *sb) 172 struct super_block *sb)
145{ 173{
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index bacc882e1ae4..245b517bf1b6 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -55,6 +55,8 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
55 if (unlikely(!inode_info)) 55 if (unlikely(!inode_info))
56 goto out; 56 goto out;
57 ecryptfs_init_crypt_stat(&inode_info->crypt_stat); 57 ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
58 mutex_init(&inode_info->lower_file_mutex);
59 atomic_set(&inode_info->lower_file_count, 0);
58 inode_info->lower_file = NULL; 60 inode_info->lower_file = NULL;
59 inode = &inode_info->vfs_inode; 61 inode = &inode_info->vfs_inode;
60out: 62out:
@@ -77,8 +79,7 @@ static void ecryptfs_i_callback(struct rcu_head *head)
77 * 79 *
78 * This is used during the final destruction of the inode. All 80 * This is used during the final destruction of the inode. All
79 * allocation of memory related to the inode, including allocated 81 * allocation of memory related to the inode, including allocated
80 * memory in the crypt_stat struct, will be released here. This 82 * memory in the crypt_stat struct, will be released here.
81 * function also fput()'s the persistent file for the lower inode.
82 * There should be no chance that this deallocation will be missed. 83 * There should be no chance that this deallocation will be missed.
83 */ 84 */
84static void ecryptfs_destroy_inode(struct inode *inode) 85static void ecryptfs_destroy_inode(struct inode *inode)
@@ -86,16 +87,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
86 struct ecryptfs_inode_info *inode_info; 87 struct ecryptfs_inode_info *inode_info;
87 88
88 inode_info = ecryptfs_inode_to_private(inode); 89 inode_info = ecryptfs_inode_to_private(inode);
89 if (inode_info->lower_file) { 90 BUG_ON(inode_info->lower_file);
90 struct dentry *lower_dentry =
91 inode_info->lower_file->f_dentry;
92
93 BUG_ON(!lower_dentry);
94 if (lower_dentry->d_inode) {
95 fput(inode_info->lower_file);
96 inode_info->lower_file = NULL;
97 }
98 }
99 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 91 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
100 call_rcu(&inode->i_rcu, ecryptfs_i_callback); 92 call_rcu(&inode->i_rcu, ecryptfs_i_callback);
101} 93}
diff --git a/fs/file.c b/fs/file.c
index 0be344755c02..4c6992d8f3ba 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/mmzone.h>
12#include <linux/time.h> 13#include <linux/time.h>
13#include <linux/sched.h> 14#include <linux/sched.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
@@ -39,14 +40,17 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */
39 */ 40 */
40static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 41static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
41 42
42static inline void *alloc_fdmem(unsigned int size) 43static void *alloc_fdmem(unsigned int size)
43{ 44{
44 void *data; 45 /*
45 46 * Very large allocations can stress page reclaim, so fall back to
46 data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); 47 * vmalloc() if the allocation size will be considered "large" by the VM.
47 if (data != NULL) 48 */
48 return data; 49 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
49 50 void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
51 if (data != NULL)
52 return data;
53 }
50 return vmalloc(size); 54 return vmalloc(size);
51} 55}
52 56
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index c71995b111bf..0f5c4f9d5d62 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -884,8 +884,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
884 } 884 }
885 885
886 brelse(dibh); 886 brelse(dibh);
887 gfs2_trans_end(sdp);
888failed: 887failed:
888 gfs2_trans_end(sdp);
889 if (al) { 889 if (al) {
890 gfs2_inplace_release(ip); 890 gfs2_inplace_release(ip);
891 gfs2_quota_unlock(ip); 891 gfs2_quota_unlock(ip);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5c356d09c321..f789c5732b7c 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1506,7 +1506,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1506 inode = gfs2_inode_lookup(dir->i_sb, 1506 inode = gfs2_inode_lookup(dir->i_sb,
1507 be16_to_cpu(dent->de_type), 1507 be16_to_cpu(dent->de_type),
1508 be64_to_cpu(dent->de_inum.no_addr), 1508 be64_to_cpu(dent->de_inum.no_addr),
1509 be64_to_cpu(dent->de_inum.no_formal_ino)); 1509 be64_to_cpu(dent->de_inum.no_formal_ino), 0);
1510 brelse(bh); 1510 brelse(bh);
1511 return inode; 1511 return inode;
1512 } 1512 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index b2682e073eee..e48310885c48 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -617,18 +617,51 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
617 return generic_file_aio_write(iocb, iov, nr_segs, pos); 617 return generic_file_aio_write(iocb, iov, nr_segs, pos);
618} 618}
619 619
620static void empty_write_end(struct page *page, unsigned from, 620static int empty_write_end(struct page *page, unsigned from,
621 unsigned to) 621 unsigned to, int mode)
622{ 622{
623 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 623 struct inode *inode = page->mapping->host;
624 struct gfs2_inode *ip = GFS2_I(inode);
625 struct buffer_head *bh;
626 unsigned offset, blksize = 1 << inode->i_blkbits;
627 pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
624 628
625 zero_user(page, from, to-from); 629 zero_user(page, from, to-from);
626 mark_page_accessed(page); 630 mark_page_accessed(page);
627 631
628 if (!gfs2_is_writeback(ip)) 632 if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
629 gfs2_page_add_databufs(ip, page, from, to); 633 if (!gfs2_is_writeback(ip))
634 gfs2_page_add_databufs(ip, page, from, to);
635
636 block_commit_write(page, from, to);
637 return 0;
638 }
639
640 offset = 0;
641 bh = page_buffers(page);
642 while (offset < to) {
643 if (offset >= from) {
644 set_buffer_uptodate(bh);
645 mark_buffer_dirty(bh);
646 clear_buffer_new(bh);
647 write_dirty_buffer(bh, WRITE);
648 }
649 offset += blksize;
650 bh = bh->b_this_page;
651 }
630 652
631 block_commit_write(page, from, to); 653 offset = 0;
654 bh = page_buffers(page);
655 while (offset < to) {
656 if (offset >= from) {
657 wait_on_buffer(bh);
658 if (!buffer_uptodate(bh))
659 return -EIO;
660 }
661 offset += blksize;
662 bh = bh->b_this_page;
663 }
664 return 0;
632} 665}
633 666
634static int needs_empty_write(sector_t block, struct inode *inode) 667static int needs_empty_write(sector_t block, struct inode *inode)
@@ -643,7 +676,8 @@ static int needs_empty_write(sector_t block, struct inode *inode)
643 return !buffer_mapped(&bh_map); 676 return !buffer_mapped(&bh_map);
644} 677}
645 678
646static int write_empty_blocks(struct page *page, unsigned from, unsigned to) 679static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
680 int mode)
647{ 681{
648 struct inode *inode = page->mapping->host; 682 struct inode *inode = page->mapping->host;
649 unsigned start, end, next, blksize; 683 unsigned start, end, next, blksize;
@@ -668,7 +702,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
668 gfs2_block_map); 702 gfs2_block_map);
669 if (unlikely(ret)) 703 if (unlikely(ret))
670 return ret; 704 return ret;
671 empty_write_end(page, start, end); 705 ret = empty_write_end(page, start, end, mode);
706 if (unlikely(ret))
707 return ret;
672 end = 0; 708 end = 0;
673 } 709 }
674 start = next; 710 start = next;
@@ -682,7 +718,9 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
682 ret = __block_write_begin(page, start, end - start, gfs2_block_map); 718 ret = __block_write_begin(page, start, end - start, gfs2_block_map);
683 if (unlikely(ret)) 719 if (unlikely(ret))
684 return ret; 720 return ret;
685 empty_write_end(page, start, end); 721 ret = empty_write_end(page, start, end, mode);
722 if (unlikely(ret))
723 return ret;
686 } 724 }
687 725
688 return 0; 726 return 0;
@@ -731,7 +769,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
731 769
732 if (curr == end) 770 if (curr == end)
733 to = end_offset; 771 to = end_offset;
734 error = write_empty_blocks(page, from, to); 772 error = write_empty_blocks(page, from, to, mode);
735 if (!error && offset + to > inode->i_size && 773 if (!error && offset + to > inode->i_size &&
736 !(mode & FALLOC_FL_KEEP_SIZE)) { 774 !(mode & FALLOC_FL_KEEP_SIZE)) {
737 i_size_write(inode, offset + to); 775 i_size_write(inode, offset + to);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f07643e21bfa..7a4fb630a320 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -93,14 +93,12 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
93 93
94static inline void spin_lock_bucket(unsigned int hash) 94static inline void spin_lock_bucket(unsigned int hash)
95{ 95{
96 struct hlist_bl_head *bl = &gl_hash_table[hash]; 96 hlist_bl_lock(&gl_hash_table[hash]);
97 bit_spin_lock(0, (unsigned long *)bl);
98} 97}
99 98
100static inline void spin_unlock_bucket(unsigned int hash) 99static inline void spin_unlock_bucket(unsigned int hash)
101{ 100{
102 struct hlist_bl_head *bl = &gl_hash_table[hash]; 101 hlist_bl_unlock(&gl_hash_table[hash]);
103 __bit_spin_unlock(0, (unsigned long *)bl);
104} 102}
105 103
106static void gfs2_glock_dealloc(struct rcu_head *rcu) 104static void gfs2_glock_dealloc(struct rcu_head *rcu)
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 3754e3cbf02b..25eeb2bcee47 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -385,6 +385,10 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
385static void iopen_go_callback(struct gfs2_glock *gl) 385static void iopen_go_callback(struct gfs2_glock *gl)
386{ 386{
387 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; 387 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
388 struct gfs2_sbd *sdp = gl->gl_sbd;
389
390 if (sdp->sd_vfs->s_flags & MS_RDONLY)
391 return;
388 392
389 if (gl->gl_demote_state == LM_ST_UNLOCKED && 393 if (gl->gl_demote_state == LM_ST_UNLOCKED &&
390 gl->gl_state == LM_ST_SHARED && ip) { 394 gl->gl_state == LM_ST_SHARED && ip) {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 97d54a28776a..9134dcb89479 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -40,37 +40,61 @@ struct gfs2_inum_range_host {
40 u64 ir_length; 40 u64 ir_length;
41}; 41};
42 42
43struct gfs2_skip_data {
44 u64 no_addr;
45 int skipped;
46 int non_block;
47};
48
43static int iget_test(struct inode *inode, void *opaque) 49static int iget_test(struct inode *inode, void *opaque)
44{ 50{
45 struct gfs2_inode *ip = GFS2_I(inode); 51 struct gfs2_inode *ip = GFS2_I(inode);
46 u64 *no_addr = opaque; 52 struct gfs2_skip_data *data = opaque;
47 53
48 if (ip->i_no_addr == *no_addr) 54 if (ip->i_no_addr == data->no_addr) {
55 if (data->non_block &&
56 inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
57 data->skipped = 1;
58 return 0;
59 }
49 return 1; 60 return 1;
50 61 }
51 return 0; 62 return 0;
52} 63}
53 64
54static int iget_set(struct inode *inode, void *opaque) 65static int iget_set(struct inode *inode, void *opaque)
55{ 66{
56 struct gfs2_inode *ip = GFS2_I(inode); 67 struct gfs2_inode *ip = GFS2_I(inode);
57 u64 *no_addr = opaque; 68 struct gfs2_skip_data *data = opaque;
58 69
59 inode->i_ino = (unsigned long)*no_addr; 70 if (data->skipped)
60 ip->i_no_addr = *no_addr; 71 return -ENOENT;
72 inode->i_ino = (unsigned long)(data->no_addr);
73 ip->i_no_addr = data->no_addr;
61 return 0; 74 return 0;
62} 75}
63 76
64struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 77struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
65{ 78{
66 unsigned long hash = (unsigned long)no_addr; 79 unsigned long hash = (unsigned long)no_addr;
67 return ilookup5(sb, hash, iget_test, &no_addr); 80 struct gfs2_skip_data data;
81
82 data.no_addr = no_addr;
83 data.skipped = 0;
84 data.non_block = 0;
85 return ilookup5(sb, hash, iget_test, &data);
68} 86}
69 87
70static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) 88static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
89 int non_block)
71{ 90{
91 struct gfs2_skip_data data;
72 unsigned long hash = (unsigned long)no_addr; 92 unsigned long hash = (unsigned long)no_addr;
73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 93
94 data.no_addr = no_addr;
95 data.skipped = 0;
96 data.non_block = non_block;
97 return iget5_locked(sb, hash, iget_test, iget_set, &data);
74} 98}
75 99
76/** 100/**
@@ -111,19 +135,20 @@ static void gfs2_set_iop(struct inode *inode)
111 * @sb: The super block 135 * @sb: The super block
112 * @no_addr: The inode number 136 * @no_addr: The inode number
113 * @type: The type of the inode 137 * @type: The type of the inode
138 * non_block: Can we block on inodes that are being freed?
114 * 139 *
115 * Returns: A VFS inode, or an error 140 * Returns: A VFS inode, or an error
116 */ 141 */
117 142
118struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, 143struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
119 u64 no_addr, u64 no_formal_ino) 144 u64 no_addr, u64 no_formal_ino, int non_block)
120{ 145{
121 struct inode *inode; 146 struct inode *inode;
122 struct gfs2_inode *ip; 147 struct gfs2_inode *ip;
123 struct gfs2_glock *io_gl = NULL; 148 struct gfs2_glock *io_gl = NULL;
124 int error; 149 int error;
125 150
126 inode = gfs2_iget(sb, no_addr); 151 inode = gfs2_iget(sb, no_addr, non_block);
127 ip = GFS2_I(inode); 152 ip = GFS2_I(inode);
128 153
129 if (!inode) 154 if (!inode)
@@ -185,11 +210,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
185{ 210{
186 struct super_block *sb = sdp->sd_vfs; 211 struct super_block *sb = sdp->sd_vfs;
187 struct gfs2_holder i_gh; 212 struct gfs2_holder i_gh;
188 struct inode *inode; 213 struct inode *inode = NULL;
189 int error; 214 int error;
190 215
216 /* Must not read in block until block type is verified */
191 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, 217 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
192 LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 218 LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
193 if (error) 219 if (error)
194 return ERR_PTR(error); 220 return ERR_PTR(error);
195 221
@@ -197,7 +223,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
197 if (error) 223 if (error)
198 goto fail; 224 goto fail;
199 225
200 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); 226 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
201 if (IS_ERR(inode)) 227 if (IS_ERR(inode))
202 goto fail; 228 goto fail;
203 229
@@ -843,7 +869,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
843 goto fail_gunlock2; 869 goto fail_gunlock2;
844 870
845 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 871 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
846 inum.no_formal_ino); 872 inum.no_formal_ino, 0);
847 if (IS_ERR(inode)) 873 if (IS_ERR(inode))
848 goto fail_gunlock2; 874 goto fail_gunlock2;
849 875
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 3e00a66e7cbd..099ca305e518 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -97,7 +97,8 @@ err:
97} 97}
98 98
99extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 99extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
100 u64 no_addr, u64 no_formal_ino); 100 u64 no_addr, u64 no_formal_ino,
101 int non_block);
101extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
102 u64 *no_formal_ino, 103 u64 *no_formal_ino,
103 unsigned int blktype); 104 unsigned int blktype);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 42ef24355afb..d3c69eb91c74 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -430,7 +430,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
430 struct dentry *dentry; 430 struct dentry *dentry;
431 struct inode *inode; 431 struct inode *inode;
432 432
433 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); 433 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
434 if (IS_ERR(inode)) { 434 if (IS_ERR(inode)) {
435 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 435 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
436 return PTR_ERR(inode); 436 return PTR_ERR(inode);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index cf930cd9664a..6fcae8469f6d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -945,7 +945,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
945 /* rgblk_search can return a block < goal, so we need to 945 /* rgblk_search can return a block < goal, so we need to
946 keep it marching forward. */ 946 keep it marching forward. */
947 no_addr = block + rgd->rd_data0; 947 no_addr = block + rgd->rd_data0;
948 goal++; 948 goal = max(block + 1, goal + 1);
949 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 949 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
950 continue; 950 continue;
951 if (no_addr == skip) 951 if (no_addr == skip)
@@ -971,7 +971,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
971 found++; 971 found++;
972 972
973 /* Limit reclaim to sensible number of tasks */ 973 /* Limit reclaim to sensible number of tasks */
974 if (found > 2*NR_CPUS) 974 if (found > NR_CPUS)
975 return; 975 return;
976 } 976 }
977 977
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a4e23d68a398..b9f28e66dad1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1318,15 +1318,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1318 1318
1319static void gfs2_evict_inode(struct inode *inode) 1319static void gfs2_evict_inode(struct inode *inode)
1320{ 1320{
1321 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 1321 struct super_block *sb = inode->i_sb;
1322 struct gfs2_sbd *sdp = sb->s_fs_info;
1322 struct gfs2_inode *ip = GFS2_I(inode); 1323 struct gfs2_inode *ip = GFS2_I(inode);
1323 struct gfs2_holder gh; 1324 struct gfs2_holder gh;
1324 int error; 1325 int error;
1325 1326
1326 if (inode->i_nlink) 1327 if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
1327 goto out; 1328 goto out;
1328 1329
1329 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1330 /* Must not read inode block until block type has been verified */
1331 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
1330 if (unlikely(error)) { 1332 if (unlikely(error)) {
1331 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1333 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1332 goto out; 1334 goto out;
@@ -1336,6 +1338,12 @@ static void gfs2_evict_inode(struct inode *inode)
1336 if (error) 1338 if (error)
1337 goto out_truncate; 1339 goto out_truncate;
1338 1340
1341 if (test_bit(GIF_INVALID, &ip->i_flags)) {
1342 error = gfs2_inode_refresh(ip);
1343 if (error)
1344 goto out_truncate;
1345 }
1346
1339 ip->i_iopen_gh.gh_flags |= GL_NOCACHE; 1347 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1340 gfs2_glock_dq_wait(&ip->i_iopen_gh); 1348 gfs2_glock_dq_wait(&ip->i_iopen_gh);
1341 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); 1349 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 33435e4b14d2..ce03a182c771 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -480,10 +480,6 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
480 !read_only) 480 !read_only)
481 return -EIO; 481 return -EIO;
482 482
483 mutex_init(&super->s_dirop_mutex);
484 mutex_init(&super->s_object_alias_mutex);
485 INIT_LIST_HEAD(&super->s_freeing_list);
486
487 ret = logfs_init_rw(sb); 483 ret = logfs_init_rw(sb);
488 if (ret) 484 if (ret)
489 return ret; 485 return ret;
@@ -601,6 +597,10 @@ static struct dentry *logfs_mount(struct file_system_type *type, int flags,
601 if (!super) 597 if (!super)
602 return ERR_PTR(-ENOMEM); 598 return ERR_PTR(-ENOMEM);
603 599
600 mutex_init(&super->s_dirop_mutex);
601 mutex_init(&super->s_object_alias_mutex);
602 INIT_LIST_HEAD(&super->s_freeing_list);
603
604 if (!devname) 604 if (!devname)
605 err = logfs_get_sb_bdev(super, type, devname); 605 err = logfs_get_sb_bdev(super, type, devname);
606 else if (strncmp(devname, "mtd", 3)) 606 else if (strncmp(devname, "mtd", 3))
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 89fc160fd5b0..1f063bacd285 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@ Elong:
119} 119}
120 120
121#ifdef CONFIG_NFS_V4 121#ifdef CONFIG_NFS_V4
122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode) 122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{ 123{
124 struct gss_api_mech *mech; 124 struct gss_api_mech *mech;
125 struct xdr_netobj oid; 125 struct xdr_netobj oid;
@@ -166,7 +166,7 @@ static int nfs_negotiate_security(const struct dentry *parent,
166 } 166 }
167 flavors = page_address(page); 167 flavors = page_address(page);
168 ret = secinfo(parent->d_inode, &dentry->d_name, flavors); 168 ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
169 *flavor = nfs_find_best_sec(flavors, dentry->d_inode); 169 *flavor = nfs_find_best_sec(flavors);
170 put_page(page); 170 put_page(page);
171 } 171 }
172 172
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1c261ddd65d..c4a69833dd0d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -47,6 +47,7 @@ enum nfs4_client_state {
47 NFS4CLNT_LAYOUTRECALL, 47 NFS4CLNT_LAYOUTRECALL,
48 NFS4CLNT_SESSION_RESET, 48 NFS4CLNT_SESSION_RESET,
49 NFS4CLNT_RECALL_SLOT, 49 NFS4CLNT_RECALL_SLOT,
50 NFS4CLNT_LEASE_CONFIRM,
50}; 51};
51 52
52enum nfs4_session_state { 53enum nfs4_session_state {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9bf41eab3e46..69c0f3c5ee7a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -46,6 +46,7 @@
46#include <linux/nfs4.h> 46#include <linux/nfs4.h>
47#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
48#include <linux/nfs_page.h> 48#include <linux/nfs_page.h>
49#include <linux/nfs_mount.h>
49#include <linux/namei.h> 50#include <linux/namei.h>
50#include <linux/mount.h> 51#include <linux/mount.h>
51#include <linux/module.h> 52#include <linux/module.h>
@@ -443,8 +444,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
443 if (res->sr_status == 1) 444 if (res->sr_status == 1)
444 res->sr_status = NFS_OK; 445 res->sr_status = NFS_OK;
445 446
446 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ 447 /* don't increment the sequence number if the task wasn't sent */
447 if (!res->sr_slot) 448 if (!RPC_WAS_SENT(task))
448 goto out; 449 goto out;
449 450
450 /* Check the SEQUENCE operation status */ 451 /* Check the SEQUENCE operation status */
@@ -2185,9 +2186,14 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2185 struct nfs4_exception exception = { }; 2186 struct nfs4_exception exception = { };
2186 int err; 2187 int err;
2187 do { 2188 do {
2188 err = nfs4_handle_exception(server, 2189 err = _nfs4_lookup_root(server, fhandle, info);
2189 _nfs4_lookup_root(server, fhandle, info), 2190 switch (err) {
2190 &exception); 2191 case 0:
2192 case -NFS4ERR_WRONGSEC:
2193 break;
2194 default:
2195 err = nfs4_handle_exception(server, err, &exception);
2196 }
2191 } while (exception.retry); 2197 } while (exception.retry);
2192 return err; 2198 return err;
2193} 2199}
@@ -2208,25 +2214,47 @@ out:
2208 return ret; 2214 return ret;
2209} 2215}
2210 2216
2211/* 2217static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2212 * get the file handle for the "/" directory on the server
2213 */
2214static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2215 struct nfs_fsinfo *info) 2218 struct nfs_fsinfo *info)
2216{ 2219{
2217 int i, len, status = 0; 2220 int i, len, status = 0;
2218 rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2]; 2221 rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS];
2219 2222
2220 flav_array[0] = RPC_AUTH_UNIX; 2223 len = gss_mech_list_pseudoflavors(&flav_array[0]);
2221 len = gss_mech_list_pseudoflavors(&flav_array[1]); 2224 flav_array[len] = RPC_AUTH_NULL;
2222 flav_array[1+len] = RPC_AUTH_NULL; 2225 len += 1;
2223 len += 2;
2224 2226
2225 for (i = 0; i < len; i++) { 2227 for (i = 0; i < len; i++) {
2226 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); 2228 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
2227 if (status != -EPERM) 2229 if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
2228 break; 2230 continue;
2231 break;
2229 } 2232 }
2233 /*
2234 * -EACCESS could mean that the user doesn't have correct permissions
2235 * to access the mount. It could also mean that we tried to mount
2236 * with a gss auth flavor, but rpc.gssd isn't running. Either way,
2237 * existing mount programs don't handle -EACCES very well so it should
2238 * be mapped to -EPERM instead.
2239 */
2240 if (status == -EACCES)
2241 status = -EPERM;
2242 return status;
2243}
2244
2245/*
2246 * get the file handle for the "/" directory on the server
2247 */
2248static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2249 struct nfs_fsinfo *info)
2250{
2251 int status = nfs4_lookup_root(server, fhandle, info);
2252 if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
2253 /*
2254 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
2255 * by nfs4_map_errors() as this function exits.
2256 */
2257 status = nfs4_find_root_sec(server, fhandle, info);
2230 if (status == 0) 2258 if (status == 0)
2231 status = nfs4_server_capabilities(server, fhandle); 2259 status = nfs4_server_capabilities(server, fhandle);
2232 if (status == 0) 2260 if (status == 0)
@@ -3723,21 +3751,20 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3723 sizeof(setclientid.sc_uaddr), "%s.%u.%u", 3751 sizeof(setclientid.sc_uaddr), "%s.%u.%u",
3724 clp->cl_ipaddr, port >> 8, port & 255); 3752 clp->cl_ipaddr, port >> 8, port & 255);
3725 3753
3726 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3754 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3727 if (status != -NFS4ERR_CLID_INUSE) 3755 if (status != -NFS4ERR_CLID_INUSE)
3728 break; 3756 break;
3729 if (signalled()) 3757 if (loop != 0) {
3758 ++clp->cl_id_uniquifier;
3730 break; 3759 break;
3731 if (loop++ & 1) 3760 }
3732 ssleep(clp->cl_lease_time / HZ + 1); 3761 ++loop;
3733 else 3762 ssleep(clp->cl_lease_time / HZ + 1);
3734 if (++clp->cl_id_uniquifier == 0)
3735 break;
3736 } 3763 }
3737 return status; 3764 return status;
3738} 3765}
3739 3766
3740static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, 3767int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3741 struct nfs4_setclientid_res *arg, 3768 struct nfs4_setclientid_res *arg,
3742 struct rpc_cred *cred) 3769 struct rpc_cred *cred)
3743{ 3770{
@@ -3752,7 +3779,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3752 int status; 3779 int status;
3753 3780
3754 now = jiffies; 3781 now = jiffies;
3755 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3782 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3756 if (status == 0) { 3783 if (status == 0) {
3757 spin_lock(&clp->cl_lock); 3784 spin_lock(&clp->cl_lock);
3758 clp->cl_lease_time = fsinfo.lease_time * HZ; 3785 clp->cl_lease_time = fsinfo.lease_time * HZ;
@@ -3762,26 +3789,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3762 return status; 3789 return status;
3763} 3790}
3764 3791
3765int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3766 struct nfs4_setclientid_res *arg,
3767 struct rpc_cred *cred)
3768{
3769 long timeout = 0;
3770 int err;
3771 do {
3772 err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
3773 switch (err) {
3774 case 0:
3775 return err;
3776 case -NFS4ERR_RESOURCE:
3777 /* The IBM lawyers misread another document! */
3778 case -NFS4ERR_DELAY:
3779 err = nfs4_delay(clp->cl_rpcclient, &timeout);
3780 }
3781 } while (err == 0);
3782 return err;
3783}
3784
3785struct nfs4_delegreturndata { 3792struct nfs4_delegreturndata {
3786 struct nfs4_delegreturnargs args; 3793 struct nfs4_delegreturnargs args;
3787 struct nfs4_delegreturnres res; 3794 struct nfs4_delegreturnres res;
@@ -4786,7 +4793,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4786 init_utsname()->domainname, 4793 init_utsname()->domainname,
4787 clp->cl_rpcclient->cl_auth->au_flavor); 4794 clp->cl_rpcclient->cl_auth->au_flavor);
4788 4795
4789 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 4796 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4790 if (!status) 4797 if (!status)
4791 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 4798 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4792 dprintk("<-- %s status= %d\n", __func__, status); 4799 dprintk("<-- %s status= %d\n", __func__, status);
@@ -4869,7 +4876,8 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4869 .rpc_client = clp->cl_rpcclient, 4876 .rpc_client = clp->cl_rpcclient,
4870 .rpc_message = &msg, 4877 .rpc_message = &msg,
4871 .callback_ops = &nfs4_get_lease_time_ops, 4878 .callback_ops = &nfs4_get_lease_time_ops,
4872 .callback_data = &data 4879 .callback_data = &data,
4880 .flags = RPC_TASK_TIMEOUT,
4873 }; 4881 };
4874 int status; 4882 int status;
4875 4883
@@ -5171,7 +5179,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
5171 nfs4_init_channel_attrs(&args); 5179 nfs4_init_channel_attrs(&args);
5172 args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); 5180 args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
5173 5181
5174 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); 5182 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5175 5183
5176 if (!status) 5184 if (!status)
5177 /* Verify the session's negotiated channel_attrs values */ 5185 /* Verify the session's negotiated channel_attrs values */
@@ -5194,20 +5202,10 @@ int nfs4_proc_create_session(struct nfs_client *clp)
5194 int status; 5202 int status;
5195 unsigned *ptr; 5203 unsigned *ptr;
5196 struct nfs4_session *session = clp->cl_session; 5204 struct nfs4_session *session = clp->cl_session;
5197 long timeout = 0;
5198 int err;
5199 5205
5200 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5206 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
5201 5207
5202 do { 5208 status = _nfs4_proc_create_session(clp);
5203 status = _nfs4_proc_create_session(clp);
5204 if (status == -NFS4ERR_DELAY) {
5205 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5206 if (err)
5207 status = err;
5208 }
5209 } while (status == -NFS4ERR_DELAY);
5210
5211 if (status) 5209 if (status)
5212 goto out; 5210 goto out;
5213 5211
@@ -5248,7 +5246,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5248 msg.rpc_argp = session; 5246 msg.rpc_argp = session;
5249 msg.rpc_resp = NULL; 5247 msg.rpc_resp = NULL;
5250 msg.rpc_cred = NULL; 5248 msg.rpc_cred = NULL;
5251 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); 5249 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5252 5250
5253 if (status) 5251 if (status)
5254 printk(KERN_WARNING 5252 printk(KERN_WARNING
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a6804f704d9d..036f5adc9e1f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -64,10 +64,15 @@ static LIST_HEAD(nfs4_clientid_list);
64 64
65int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 65int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
66{ 66{
67 struct nfs4_setclientid_res clid; 67 struct nfs4_setclientid_res clid = {
68 .clientid = clp->cl_clientid,
69 .confirm = clp->cl_confirm,
70 };
68 unsigned short port; 71 unsigned short port;
69 int status; 72 int status;
70 73
74 if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
75 goto do_confirm;
71 port = nfs_callback_tcpport; 76 port = nfs_callback_tcpport;
72 if (clp->cl_addr.ss_family == AF_INET6) 77 if (clp->cl_addr.ss_family == AF_INET6)
73 port = nfs_callback_tcpport6; 78 port = nfs_callback_tcpport6;
@@ -75,10 +80,14 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
75 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); 80 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
76 if (status != 0) 81 if (status != 0)
77 goto out; 82 goto out;
83 clp->cl_clientid = clid.clientid;
84 clp->cl_confirm = clid.confirm;
85 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
86do_confirm:
78 status = nfs4_proc_setclientid_confirm(clp, &clid, cred); 87 status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
79 if (status != 0) 88 if (status != 0)
80 goto out; 89 goto out;
81 clp->cl_clientid = clid.clientid; 90 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
82 nfs4_schedule_state_renewal(clp); 91 nfs4_schedule_state_renewal(clp);
83out: 92out:
84 return status; 93 return status;
@@ -230,13 +239,18 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
230{ 239{
231 int status; 240 int status;
232 241
242 if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
243 goto do_confirm;
233 nfs4_begin_drain_session(clp); 244 nfs4_begin_drain_session(clp);
234 status = nfs4_proc_exchange_id(clp, cred); 245 status = nfs4_proc_exchange_id(clp, cred);
235 if (status != 0) 246 if (status != 0)
236 goto out; 247 goto out;
248 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
249do_confirm:
237 status = nfs4_proc_create_session(clp); 250 status = nfs4_proc_create_session(clp);
238 if (status != 0) 251 if (status != 0)
239 goto out; 252 goto out;
253 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
240 nfs41_setup_state_renewal(clp); 254 nfs41_setup_state_renewal(clp);
241 nfs_mark_client_ready(clp, NFS_CS_READY); 255 nfs_mark_client_ready(clp, NFS_CS_READY);
242out: 256out:
@@ -1584,20 +1598,23 @@ static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1584 */ 1598 */
1585static void nfs4_set_lease_expired(struct nfs_client *clp, int status) 1599static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1586{ 1600{
1587 if (nfs4_has_session(clp)) { 1601 switch (status) {
1588 switch (status) { 1602 case -NFS4ERR_CLID_INUSE:
1589 case -NFS4ERR_DELAY: 1603 case -NFS4ERR_STALE_CLIENTID:
1590 case -NFS4ERR_CLID_INUSE: 1604 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1591 case -EAGAIN: 1605 break;
1592 break; 1606 case -NFS4ERR_DELAY:
1607 case -ETIMEDOUT:
1608 case -EAGAIN:
1609 ssleep(1);
1610 break;
1593 1611
1594 case -EKEYEXPIRED: 1612 case -EKEYEXPIRED:
1595 nfs4_warn_keyexpired(clp->cl_hostname); 1613 nfs4_warn_keyexpired(clp->cl_hostname);
1596 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1614 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1597 * in nfs4_exchange_id */ 1615 * in nfs4_exchange_id */
1598 default: 1616 default:
1599 return; 1617 return;
1600 }
1601 } 1618 }
1602 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1619 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1603} 1620}
@@ -1607,7 +1624,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1607 int status = 0; 1624 int status = 0;
1608 1625
1609 /* Ensure exclusive access to NFSv4 state */ 1626 /* Ensure exclusive access to NFSv4 state */
1610 for(;;) { 1627 do {
1611 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1628 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
1612 /* We're going to have to re-establish a clientid */ 1629 /* We're going to have to re-establish a clientid */
1613 status = nfs4_reclaim_lease(clp); 1630 status = nfs4_reclaim_lease(clp);
@@ -1691,7 +1708,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1691 break; 1708 break;
1692 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) 1709 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1693 break; 1710 break;
1694 } 1711 } while (atomic_read(&clp->cl_count) > 1);
1695 return; 1712 return;
1696out_error: 1713out_error:
1697 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1714 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dddfb5795d7b..c3ccd2c46834 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1452,26 +1452,25 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1452 1452
1453static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1453static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1454{ 1454{
1455 uint32_t attrs[2] = {0, 0}; 1455 uint32_t attrs[2] = {
1456 FATTR4_WORD0_RDATTR_ERROR,
1457 FATTR4_WORD1_MOUNTED_ON_FILEID,
1458 };
1456 uint32_t dircount = readdir->count >> 1; 1459 uint32_t dircount = readdir->count >> 1;
1457 __be32 *p; 1460 __be32 *p;
1458 1461
1459 if (readdir->plus) { 1462 if (readdir->plus) {
1460 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| 1463 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
1461 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE; 1464 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID;
1462 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| 1465 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
1463 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| 1466 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
1464 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| 1467 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
1465 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 1468 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1466 dircount >>= 1; 1469 dircount >>= 1;
1467 } 1470 }
1468 attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID; 1471 /* Use mounted_on_fileid only if the server supports it */
1469 attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; 1472 if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
1470 /* Switch to mounted_on_fileid if the server supports it */ 1473 attrs[0] |= FATTR4_WORD0_FILEID;
1471 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1472 attrs[0] &= ~FATTR4_WORD0_FILEID;
1473 else
1474 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1475 1474
1476 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1475 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1477 *p++ = cpu_to_be32(OP_READDIR); 1476 *p++ = cpu_to_be32(OP_READDIR);
@@ -3140,7 +3139,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
3140 goto out_overflow; 3139 goto out_overflow;
3141 xdr_decode_hyper(p, fileid); 3140 xdr_decode_hyper(p, fileid);
3142 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 3141 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
3143 ret = NFS_ATTR_FATTR_FILEID; 3142 ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
3144 } 3143 }
3145 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 3144 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
3146 return ret; 3145 return ret;
@@ -4002,7 +4001,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4002{ 4001{
4003 int status; 4002 int status;
4004 umode_t fmode = 0; 4003 umode_t fmode = 0;
4005 uint64_t fileid;
4006 uint32_t type; 4004 uint32_t type;
4007 4005
4008 status = decode_attr_type(xdr, bitmap, &type); 4006 status = decode_attr_type(xdr, bitmap, &type);
@@ -4101,13 +4099,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
4101 goto xdr_error; 4099 goto xdr_error;
4102 fattr->valid |= status; 4100 fattr->valid |= status;
4103 4101
4104 status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid); 4102 status = decode_attr_mounted_on_fileid(xdr, bitmap, &fattr->mounted_on_fileid);
4105 if (status < 0) 4103 if (status < 0)
4106 goto xdr_error; 4104 goto xdr_error;
4107 if (status != 0 && !(fattr->valid & status)) { 4105 fattr->valid |= status;
4108 fattr->fileid = fileid;
4109 fattr->valid |= status;
4110 }
4111 4106
4112xdr_error: 4107xdr_error:
4113 dprintk("%s: xdr returned %d\n", __func__, -status); 4108 dprintk("%s: xdr returned %d\n", __func__, -status);
@@ -4838,17 +4833,21 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
4838 struct nfs4_secinfo_flavor *sec_flavor; 4833 struct nfs4_secinfo_flavor *sec_flavor;
4839 int status; 4834 int status;
4840 __be32 *p; 4835 __be32 *p;
4841 int i; 4836 int i, num_flavors;
4842 4837
4843 status = decode_op_hdr(xdr, OP_SECINFO); 4838 status = decode_op_hdr(xdr, OP_SECINFO);
4839 if (status)
4840 goto out;
4844 p = xdr_inline_decode(xdr, 4); 4841 p = xdr_inline_decode(xdr, 4);
4845 if (unlikely(!p)) 4842 if (unlikely(!p))
4846 goto out_overflow; 4843 goto out_overflow;
4847 res->flavors->num_flavors = be32_to_cpup(p);
4848 4844
4849 for (i = 0; i < res->flavors->num_flavors; i++) { 4845 res->flavors->num_flavors = 0;
4846 num_flavors = be32_to_cpup(p);
4847
4848 for (i = 0; i < num_flavors; i++) {
4850 sec_flavor = &res->flavors->flavors[i]; 4849 sec_flavor = &res->flavors->flavors[i];
4851 if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE) 4850 if ((char *)&sec_flavor[1] - (char *)res->flavors > PAGE_SIZE)
4852 break; 4851 break;
4853 4852
4854 p = xdr_inline_decode(xdr, 4); 4853 p = xdr_inline_decode(xdr, 4);
@@ -4857,13 +4856,15 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
4857 sec_flavor->flavor = be32_to_cpup(p); 4856 sec_flavor->flavor = be32_to_cpup(p);
4858 4857
4859 if (sec_flavor->flavor == RPC_AUTH_GSS) { 4858 if (sec_flavor->flavor == RPC_AUTH_GSS) {
4860 if (decode_secinfo_gss(xdr, sec_flavor)) 4859 status = decode_secinfo_gss(xdr, sec_flavor);
4861 break; 4860 if (status)
4861 goto out;
4862 } 4862 }
4863 res->flavors->num_flavors++;
4863 } 4864 }
4864 4865
4865 return 0; 4866out:
4866 4867 return status;
4867out_overflow: 4868out_overflow:
4868 print_overflow_msg(__func__, xdr); 4869 print_overflow_msg(__func__, xdr);
4869 return -EIO; 4870 return -EIO;
@@ -6408,7 +6409,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6408 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, 6409 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
6409 entry->server, 1) < 0) 6410 entry->server, 1) < 0)
6410 goto out_overflow; 6411 goto out_overflow;
6411 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) 6412 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
6413 entry->ino = entry->fattr->mounted_on_fileid;
6414 else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
6412 entry->ino = entry->fattr->fileid; 6415 entry->ino = entry->fattr->fileid;
6413 6416
6414 entry->d_type = DT_UNKNOWN; 6417 entry->d_type = DT_UNKNOWN;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d9ab97269ce6..ff681ab65d31 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1004,6 +1004,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1004{ 1004{
1005 struct nfs_inode *nfsi = NFS_I(wdata->inode); 1005 struct nfs_inode *nfsi = NFS_I(wdata->inode);
1006 loff_t end_pos = wdata->args.offset + wdata->res.count; 1006 loff_t end_pos = wdata->args.offset + wdata->res.count;
1007 bool mark_as_dirty = false;
1007 1008
1008 spin_lock(&nfsi->vfs_inode.i_lock); 1009 spin_lock(&nfsi->vfs_inode.i_lock);
1009 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { 1010 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
@@ -1011,13 +1012,18 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1011 get_lseg(wdata->lseg); 1012 get_lseg(wdata->lseg);
1012 wdata->lseg->pls_lc_cred = 1013 wdata->lseg->pls_lc_cred =
1013 get_rpccred(wdata->args.context->state->owner->so_cred); 1014 get_rpccred(wdata->args.context->state->owner->so_cred);
1014 mark_inode_dirty_sync(wdata->inode); 1015 mark_as_dirty = true;
1015 dprintk("%s: Set layoutcommit for inode %lu ", 1016 dprintk("%s: Set layoutcommit for inode %lu ",
1016 __func__, wdata->inode->i_ino); 1017 __func__, wdata->inode->i_ino);
1017 } 1018 }
1018 if (end_pos > wdata->lseg->pls_end_pos) 1019 if (end_pos > wdata->lseg->pls_end_pos)
1019 wdata->lseg->pls_end_pos = end_pos; 1020 wdata->lseg->pls_end_pos = end_pos;
1020 spin_unlock(&nfsi->vfs_inode.i_lock); 1021 spin_unlock(&nfsi->vfs_inode.i_lock);
1022
1023 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1024 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1025 if (mark_as_dirty)
1026 mark_inode_dirty_sync(wdata->inode);
1021} 1027}
1022EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); 1028EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
1023 1029
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2b8e9a5e366a..e288f06d3fa7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1004,6 +1004,7 @@ static int nfs_parse_security_flavors(char *value,
1004 return 0; 1004 return 0;
1005 } 1005 }
1006 1006
1007 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
1007 mnt->auth_flavor_len = 1; 1008 mnt->auth_flavor_len = 1;
1008 return 1; 1009 return 1;
1009} 1010}
@@ -1976,6 +1977,15 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
1976 if (error < 0) 1977 if (error < 0)
1977 goto out; 1978 goto out;
1978 1979
1980 /*
1981 * noac is a special case. It implies -o sync, but that's not
1982 * necessarily reflected in the mtab options. do_remount_sb
1983 * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the
1984 * remount options, so we have to explicitly reset it.
1985 */
1986 if (data->flags & NFS_MOUNT_NOAC)
1987 *flags |= MS_SYNCHRONOUS;
1988
1979 /* compare new mount options with old ones */ 1989 /* compare new mount options with old ones */
1980 error = nfs_compare_remount_data(nfss, data); 1990 error = nfs_compare_remount_data(nfss, data);
1981out: 1991out:
@@ -2235,8 +2245,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2235 if (!s->s_root) { 2245 if (!s->s_root) {
2236 /* initial superblock/root creation */ 2246 /* initial superblock/root creation */
2237 nfs_fill_super(s, data); 2247 nfs_fill_super(s, data);
2238 nfs_fscache_get_super_cookie( 2248 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2239 s, data ? data->fscache_uniq : NULL, NULL);
2240 } 2249 }
2241 2250
2242 mntroot = nfs_get_root(s, mntfh, dev_name); 2251 mntroot = nfs_get_root(s, mntfh, dev_name);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e4cbc11a74ab..3bd5d7e80f6c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -680,7 +680,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
680 req = nfs_setup_write_request(ctx, page, offset, count); 680 req = nfs_setup_write_request(ctx, page, offset, count);
681 if (IS_ERR(req)) 681 if (IS_ERR(req))
682 return PTR_ERR(req); 682 return PTR_ERR(req);
683 nfs_mark_request_dirty(req);
684 /* Update file length */ 683 /* Update file length */
685 nfs_grow_file(page, offset, count); 684 nfs_grow_file(page, offset, count);
686 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 685 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
@@ -1418,8 +1417,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1418 task->tk_pid, task->tk_status); 1417 task->tk_pid, task->tk_status);
1419 1418
1420 /* Call the NFS version-specific code */ 1419 /* Call the NFS version-specific code */
1421 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 1420 NFS_PROTO(data->inode)->commit_done(task, data);
1422 return;
1423} 1421}
1424 1422
1425void nfs_commit_release_pages(struct nfs_write_data *data) 1423void nfs_commit_release_pages(struct nfs_write_data *data)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index aa309aa93fe8..4cf04e11c66c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -258,6 +258,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
258 if (atomic_dec_and_test(&fp->fi_delegees)) { 258 if (atomic_dec_and_test(&fp->fi_delegees)) {
259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); 259 vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
260 fp->fi_lease = NULL; 260 fp->fi_lease = NULL;
261 fput(fp->fi_deleg_file);
261 fp->fi_deleg_file = NULL; 262 fp->fi_deleg_file = NULL;
262 } 263 }
263} 264}
@@ -402,8 +403,8 @@ static void free_generic_stateid(struct nfs4_stateid *stp)
402 if (stp->st_access_bmap) { 403 if (stp->st_access_bmap) {
403 oflag = nfs4_access_bmap_to_omode(stp); 404 oflag = nfs4_access_bmap_to_omode(stp);
404 nfs4_file_put_access(stp->st_file, oflag); 405 nfs4_file_put_access(stp->st_file, oflag);
405 put_nfs4_file(stp->st_file);
406 } 406 }
407 put_nfs4_file(stp->st_file);
407 kmem_cache_free(stateid_slab, stp); 408 kmem_cache_free(stateid_slab, stp);
408} 409}
409 410
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2e1cebde90df..129f3c9f62d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1363,7 +1363,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1363 goto out; 1363 goto out;
1364 if (!(iap->ia_valid & ATTR_MODE)) 1364 if (!(iap->ia_valid & ATTR_MODE))
1365 iap->ia_mode = 0; 1365 iap->ia_mode = 0;
1366 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1366 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
1367 if (err) 1367 if (err)
1368 goto out; 1368 goto out;
1369 1369
@@ -1385,6 +1385,13 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1385 if (IS_ERR(dchild)) 1385 if (IS_ERR(dchild))
1386 goto out_nfserr; 1386 goto out_nfserr;
1387 1387
1388 /* If file doesn't exist, check for permissions to create one */
1389 if (!dchild->d_inode) {
1390 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1391 if (err)
1392 goto out;
1393 }
1394
1388 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); 1395 err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1389 if (err) 1396 if (err)
1390 goto out; 1397 goto out;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index b68f87a83924..938387a10d5d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1019,7 +1019,7 @@ struct ocfs2_xattr_entry {
1019 __le16 xe_name_offset; /* byte offset from the 1st entry in the 1019 __le16 xe_name_offset; /* byte offset from the 1st entry in the
1020 local xattr storage(inode, xattr block or 1020 local xattr storage(inode, xattr block or
1021 xattr bucket). */ 1021 xattr bucket). */
1022 __u8 xe_name_len; /* xattr name len, does't include prefix. */ 1022 __u8 xe_name_len; /* xattr name len, doesn't include prefix. */
1023 __u8 xe_type; /* the low 7 bits indicate the name prefix 1023 __u8 xe_type; /* the low 7 bits indicate the name prefix
1024 * type and the highest bit indicates whether 1024 * type and the highest bit indicates whether
1025 * the EA is stored in the local storage. */ 1025 * the EA is stored in the local storage. */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd6628d3ba42..dfa532730e55 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
3124/* for the /proc/ directory itself, after non-process stuff has been done */ 3124/* for the /proc/ directory itself, after non-process stuff has been done */
3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3126{ 3126{
3127 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 3127 unsigned int nr;
3128 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 3128 struct task_struct *reaper;
3129 struct tgid_iter iter; 3129 struct tgid_iter iter;
3130 struct pid_namespace *ns; 3130 struct pid_namespace *ns;
3131 3131
3132 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3133 goto out_no_task;
3134 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
3135
3136 reaper = get_proc_task(filp->f_path.dentry->d_inode);
3132 if (!reaper) 3137 if (!reaper)
3133 goto out_no_task; 3138 goto out_no_task;
3134 3139
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 4d0cb1241460..40fa780ebea7 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -175,26 +175,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
175} 175}
176 176
177/** 177/**
178 * ubifs_create_buds_lists - create journal head buds lists for remount rw.
179 * @c: UBIFS file-system description object
180 */
181void ubifs_create_buds_lists(struct ubifs_info *c)
182{
183 struct rb_node *p;
184
185 spin_lock(&c->buds_lock);
186 p = rb_first(&c->buds);
187 while (p) {
188 struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
189 struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
190
191 list_add_tail(&bud->list, &jhead->buds_list);
192 p = rb_next(p);
193 }
194 spin_unlock(&c->buds_lock);
195}
196
197/**
198 * ubifs_add_bud_to_log - add a new bud to the log. 178 * ubifs_add_bud_to_log - add a new bud to the log.
199 * @c: UBIFS file-system description object 179 * @c: UBIFS file-system description object
200 * @jhead: journal head the bud belongs to 180 * @jhead: journal head the bud belongs to
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 936f2cbfe6b6..3dbad6fbd1eb 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -317,6 +317,32 @@ int ubifs_recover_master_node(struct ubifs_info *c)
317 goto out_free; 317 goto out_free;
318 } 318 }
319 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); 319 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
320
321 /*
322 * We had to recover the master node, which means there was an
323 * unclean reboot. However, it is possible that the master node
324 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
325 * E.g., consider the following chain of events:
326 *
327 * 1. UBIFS was cleanly unmounted, so the master node is clean
328 * 2. UBIFS is being mounted R/W and starts changing the master
329 * node in the first (%UBIFS_MST_LNUM). A power cut happens,
330 * so this LEB ends up with some amount of garbage at the
331 * end.
332 * 3. UBIFS is being mounted R/O. We reach this place and
333 * recover the master node from the second LEB
334 * (%UBIFS_MST_LNUM + 1). But we cannot update the media
335 * because we are being mounted R/O. We have to defer the
336 * operation.
337 * 4. However, this master node (@c->mst_node) is marked as
338 * clean (since the step 1). And if we just return, the
339 * mount code will be confused and won't recover the master
340 * node when it is re-mounter R/W later.
341 *
342 * Thus, to force the recovery by marking the master node as
343 * dirty.
344 */
345 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
320 } else { 346 } else {
321 /* Write the recovered master node */ 347 /* Write the recovered master node */
322 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; 348 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eed0fcff8d73..d3d6d365bfc1 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -59,6 +59,7 @@ enum {
59 * @new_size: truncation new size 59 * @new_size: truncation new size
60 * @free: amount of free space in a bud 60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes 61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 * @jhead: journal head number of the bud
62 * 63 *
63 * UBIFS journal replay must compare node sequence numbers, which means it must 64 * UBIFS journal replay must compare node sequence numbers, which means it must
64 * build a tree of node information to insert into the TNC. 65 * build a tree of node information to insert into the TNC.
@@ -80,6 +81,7 @@ struct replay_entry {
80 struct { 81 struct {
81 int free; 82 int free;
82 int dirty; 83 int dirty;
84 int jhead;
83 }; 85 };
84 }; 86 };
85}; 87};
@@ -159,6 +161,11 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
159 err = PTR_ERR(lp); 161 err = PTR_ERR(lp);
160 goto out; 162 goto out;
161 } 163 }
164
165 /* Make sure the journal head points to the latest bud */
166 err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum,
167 c->leb_size - r->free, UBI_SHORTTERM);
168
162out: 169out:
163 ubifs_release_lprops(c); 170 ubifs_release_lprops(c);
164 return err; 171 return err;
@@ -627,10 +634,6 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
627 ubifs_assert(sleb->endpt - offs >= used); 634 ubifs_assert(sleb->endpt - offs >= used);
628 ubifs_assert(sleb->endpt % c->min_io_size == 0); 635 ubifs_assert(sleb->endpt % c->min_io_size == 0);
629 636
630 if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount)
631 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
632 sleb->endpt, UBI_SHORTTERM);
633
634 *dirty = sleb->endpt - offs - used; 637 *dirty = sleb->endpt - offs - used;
635 *free = c->leb_size - sleb->endpt; 638 *free = c->leb_size - sleb->endpt;
636 639
@@ -653,12 +656,14 @@ out_dump:
653 * @sqnum: sequence number 656 * @sqnum: sequence number
654 * @free: amount of free space in bud 657 * @free: amount of free space in bud
655 * @dirty: amount of dirty space from padding and deletion nodes 658 * @dirty: amount of dirty space from padding and deletion nodes
659 * @jhead: journal head number for the bud
656 * 660 *
657 * This function inserts a reference node to the replay tree and returns zero 661 * This function inserts a reference node to the replay tree and returns zero
658 * in case of success or a negative error code in case of failure. 662 * in case of success or a negative error code in case of failure.
659 */ 663 */
660static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 664static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
661 unsigned long long sqnum, int free, int dirty) 665 unsigned long long sqnum, int free, int dirty,
666 int jhead)
662{ 667{
663 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 668 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
664 struct replay_entry *r; 669 struct replay_entry *r;
@@ -688,6 +693,7 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
688 r->flags = REPLAY_REF; 693 r->flags = REPLAY_REF;
689 r->free = free; 694 r->free = free;
690 r->dirty = dirty; 695 r->dirty = dirty;
696 r->jhead = jhead;
691 697
692 rb_link_node(&r->rb, parent, p); 698 rb_link_node(&r->rb, parent, p);
693 rb_insert_color(&r->rb, &c->replay_tree); 699 rb_insert_color(&r->rb, &c->replay_tree);
@@ -712,7 +718,7 @@ static int replay_buds(struct ubifs_info *c)
712 if (err) 718 if (err)
713 return err; 719 return err;
714 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, 720 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
715 free, dirty); 721 free, dirty, b->bud->jhead);
716 if (err) 722 if (err)
717 return err; 723 return err;
718 } 724 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c75f6133206c..04ad07f4fcc3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1257,12 +1257,12 @@ static int mount_ubifs(struct ubifs_info *c)
1257 goto out_free; 1257 goto out_free;
1258 } 1258 }
1259 1259
1260 err = alloc_wbufs(c);
1261 if (err)
1262 goto out_cbuf;
1263
1260 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); 1264 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
1261 if (!c->ro_mount) { 1265 if (!c->ro_mount) {
1262 err = alloc_wbufs(c);
1263 if (err)
1264 goto out_cbuf;
1265
1266 /* Create background thread */ 1266 /* Create background thread */
1267 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); 1267 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
1268 if (IS_ERR(c->bgt)) { 1268 if (IS_ERR(c->bgt)) {
@@ -1631,12 +1631,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1631 if (err) 1631 if (err)
1632 goto out; 1632 goto out;
1633 1633
1634 err = alloc_wbufs(c);
1635 if (err)
1636 goto out;
1637
1638 ubifs_create_buds_lists(c);
1639
1640 /* Create background thread */ 1634 /* Create background thread */
1641 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); 1635 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
1642 if (IS_ERR(c->bgt)) { 1636 if (IS_ERR(c->bgt)) {
@@ -1671,14 +1665,25 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1671 if (err) 1665 if (err)
1672 goto out; 1666 goto out;
1673 1667
1668 dbg_gen("re-mounted read-write");
1669 c->remounting_rw = 0;
1670
1674 if (c->need_recovery) { 1671 if (c->need_recovery) {
1675 c->need_recovery = 0; 1672 c->need_recovery = 0;
1676 ubifs_msg("deferred recovery completed"); 1673 ubifs_msg("deferred recovery completed");
1674 } else {
1675 /*
1676 * Do not run the debugging space check if the were doing
1677 * recovery, because when we saved the information we had the
1678 * file-system in a state where the TNC and lprops has been
1679 * modified in memory, but all the I/O operations (including a
1680 * commit) were deferred. So the file-system was in
1681 * "non-committed" state. Now the file-system is in committed
1682 * state, and of course the amount of free space will change
1683 * because, for example, the old index size was imprecise.
1684 */
1685 err = dbg_check_space_info(c);
1677 } 1686 }
1678
1679 dbg_gen("re-mounted read-write");
1680 c->remounting_rw = 0;
1681 err = dbg_check_space_info(c);
1682 mutex_unlock(&c->umount_mutex); 1687 mutex_unlock(&c->umount_mutex);
1683 return err; 1688 return err;
1684 1689
@@ -1733,7 +1738,6 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1733 if (err) 1738 if (err)
1734 ubifs_ro_mode(c, err); 1739 ubifs_ro_mode(c, err);
1735 1740
1736 free_wbufs(c);
1737 vfree(c->orph_buf); 1741 vfree(c->orph_buf);
1738 c->orph_buf = NULL; 1742 c->orph_buf = NULL;
1739 kfree(c->write_reserve_buf); 1743 kfree(c->write_reserve_buf);
@@ -1761,10 +1765,12 @@ static void ubifs_put_super(struct super_block *sb)
1761 * of the media. For example, there will be dirty inodes if we failed 1765 * of the media. For example, there will be dirty inodes if we failed
1762 * to write them back because of I/O errors. 1766 * to write them back because of I/O errors.
1763 */ 1767 */
1764 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1768 if (!c->ro_error) {
1765 ubifs_assert(c->budg_idx_growth == 0); 1769 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
1766 ubifs_assert(c->budg_dd_growth == 0); 1770 ubifs_assert(c->budg_idx_growth == 0);
1767 ubifs_assert(c->budg_data_growth == 0); 1771 ubifs_assert(c->budg_dd_growth == 0);
1772 ubifs_assert(c->budg_data_growth == 0);
1773 }
1768 1774
1769 /* 1775 /*
1770 * The 'c->umount_lock' prevents races between UBIFS memory shrinker 1776 * The 'c->umount_lock' prevents races between UBIFS memory shrinker
diff --git a/fs/xattr.c b/fs/xattr.c
index a19acdb81cd1..f1ef94974dea 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -666,7 +666,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); 666 handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
667 if (!handler) 667 if (!handler)
668 return -EOPNOTSUPP; 668 return -EOPNOTSUPP;
669 return handler->set(dentry, name, value, size, 0, handler->flags); 669 return handler->set(dentry, name, value, size, flags, handler->flags);
670} 670}
671 671
672/* 672/*
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 3ca795609113..9f76cceb678d 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -34,8 +34,10 @@ __xfs_printk(
34 const struct xfs_mount *mp, 34 const struct xfs_mount *mp,
35 struct va_format *vaf) 35 struct va_format *vaf)
36{ 36{
37 if (mp && mp->m_fsname) 37 if (mp && mp->m_fsname) {
38 printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); 38 printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return;
40 }
39 printk("%sXFS: %pV\n", level, vaf); 41 printk("%sXFS: %pV\n", level, vaf);
40} 42}
41 43