aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2011-04-19 07:32:41 -0400
committerJames Morris <jmorris@namei.org>2011-04-19 07:32:41 -0400
commitd4ab4e6a23f805abb8fc3cc34525eec3788aeca1 (patch)
treeeefd82c155bc27469a85667d759cd90facf4a6e3 /fs/btrfs
parentc0fa797ae6cd02ff87c0bfe0d509368a3b45640e (diff)
parent96fd2d57b8252e16dfacf8941f7a74a6119197f5 (diff)
Merge branch 'master'; commit 'v2.6.39-rc3' into next
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c9
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c125
-rw-r--r--fs/btrfs/extent_io.c82
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file.c23
-rw-r--r--fs/btrfs/free-space-cache.c203
-rw-r--r--fs/btrfs/inode.c196
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/root-tree.c18
-rw-r--r--fs/btrfs/super.c61
-rw-r--r--fs/btrfs/transaction.c50
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/xattr.c33
17 files changed, 578 insertions, 261 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index de34bfad9ec3..5d505aaa72fb 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
178 178
179 if (value) { 179 if (value) {
180 acl = posix_acl_from_xattr(value, size); 180 acl = posix_acl_from_xattr(value, size);
181 if (acl == NULL) { 181 if (acl) {
182 value = NULL; 182 ret = posix_acl_valid(acl);
183 size = 0; 183 if (ret)
184 goto out;
184 } else if (IS_ERR(acl)) { 185 } else if (IS_ERR(acl)) {
185 return PTR_ERR(acl); 186 return PTR_ERR(acl);
186 } 187 }
187 } 188 }
188 189
189 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); 190 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
190 191out:
191 posix_acl_release(acl); 192 posix_acl_release(acl);
192 193
193 return ret; 194 return ret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d47ce8307854..2e61fe1b6b8c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -740,8 +740,10 @@ struct btrfs_space_info {
740 */ 740 */
741 unsigned long reservation_progress; 741 unsigned long reservation_progress;
742 742
743 int full; /* indicates that we cannot allocate any more 743 int full:1; /* indicates that we cannot allocate any more
744 chunks for this space */ 744 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */
746
745 int force_alloc; /* set if we need to force a chunk alloc for 747 int force_alloc; /* set if we need to force a chunk alloc for
746 this space */ 748 this space */
747 749
@@ -1284,6 +1286,8 @@ struct btrfs_root {
1284#define BTRFS_INODE_DIRSYNC (1 << 10) 1286#define BTRFS_INODE_DIRSYNC (1 << 10)
1285#define BTRFS_INODE_COMPRESS (1 << 11) 1287#define BTRFS_INODE_COMPRESS (1 << 11)
1286 1288
1289#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
1290
1287/* some macros to generate set/get funcs for the struct fields. This 1291/* some macros to generate set/get funcs for the struct fields. This
1288 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1292 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
1289 * one for u8: 1293 * one for u8:
@@ -2359,6 +2363,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2359int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2363int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2360int btrfs_set_root_node(struct btrfs_root_item *item, 2364int btrfs_set_root_node(struct btrfs_root_item *item,
2361 struct extent_buffer *node); 2365 struct extent_buffer *node);
2366void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2367
2362/* dir-item.c */ 2368/* dir-item.c */
2363int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2369int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2364 struct btrfs_root *root, const char *name, 2370 struct btrfs_root *root, const char *name,
@@ -2572,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2572int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2578int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2573 struct inode *inode, u64 start, u64 end); 2579 struct inode *inode, u64 start, u64 end);
2574int btrfs_release_file(struct inode *inode, struct file *file); 2580int btrfs_release_file(struct inode *inode, struct file *file);
2581void btrfs_drop_pages(struct page **pages, size_t num_pages);
2582int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
2583 struct page **pages, size_t num_pages,
2584 loff_t pos, size_t write_bytes,
2585 struct extent_state **cached);
2575 2586
2576/* tree-defrag.c */ 2587/* tree-defrag.c */
2577int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 2588int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d7a7315bd031..68c84c8c24bd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1275,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1275 root->commit_root = btrfs_root_node(root); 1275 root->commit_root = btrfs_root_node(root);
1276 BUG_ON(!root->node); 1276 BUG_ON(!root->node);
1277out: 1277out:
1278 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) 1278 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1279 root->ref_cows = 1; 1279 root->ref_cows = 1;
1280 btrfs_check_and_init_root_item(&root->root_item);
1281 }
1280 1282
1281 return root; 1283 return root;
1282} 1284}
@@ -3055,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3055 btrfs_destroy_pinned_extent(root, 3057 btrfs_destroy_pinned_extent(root,
3056 root->fs_info->pinned_extents); 3058 root->fs_info->pinned_extents);
3057 3059
3058 t->use_count = 0; 3060 atomic_set(&t->use_count, 0);
3059 list_del_init(&t->list); 3061 list_del_init(&t->list);
3060 memset(t, 0, sizeof(*t)); 3062 memset(t, 0, sizeof(*t));
3061 kmem_cache_free(btrfs_transaction_cachep, t); 3063 kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..31f33ba56fe8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
33#include "locking.h" 33#include "locking.h"
34#include "free-space-cache.h" 34#include "free-space-cache.h"
35 35
36/* control flags for do_chunk_alloc's force field
37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38 * if we really need one.
39 *
40 * CHUNK_ALLOC_FORCE means it must try to allocate one
41 *
42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
43 * if we have very few chunks already allocated. This is
44 * used as part of the clustering code to help make sure
45 * we have a good pool of storage to cluster in, without
46 * filling the FS with empty chunks
47 *
48 */
49enum {
50 CHUNK_ALLOC_NO_FORCE = 0,
51 CHUNK_ALLOC_FORCE = 1,
52 CHUNK_ALLOC_LIMITED = 2,
53};
54
36static int update_block_group(struct btrfs_trans_handle *trans, 55static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 56 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 57 u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3019 found->bytes_readonly = 0; 3038 found->bytes_readonly = 0;
3020 found->bytes_may_use = 0; 3039 found->bytes_may_use = 0;
3021 found->full = 0; 3040 found->full = 0;
3022 found->force_alloc = 0; 3041 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3042 found->chunk_alloc = 0;
3023 *space_info = found; 3043 *space_info = found;
3024 list_add_rcu(&found->list, &info->space_info); 3044 list_add_rcu(&found->list, &info->space_info);
3025 atomic_set(&found->caching_threads, 0); 3045 atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
3150 if (!data_sinfo->full && alloc_chunk) { 3170 if (!data_sinfo->full && alloc_chunk) {
3151 u64 alloc_target; 3171 u64 alloc_target;
3152 3172
3153 data_sinfo->force_alloc = 1; 3173 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3154 spin_unlock(&data_sinfo->lock); 3174 spin_unlock(&data_sinfo->lock);
3155alloc: 3175alloc:
3156 alloc_target = btrfs_get_alloc_profile(root, 1); 3176 alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
3160 3180
3161 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3181 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3162 bytes + 2 * 1024 * 1024, 3182 bytes + 2 * 1024 * 1024,
3163 alloc_target, 0); 3183 alloc_target,
3184 CHUNK_ALLOC_NO_FORCE);
3164 btrfs_end_transaction(trans, root); 3185 btrfs_end_transaction(trans, root);
3165 if (ret < 0) { 3186 if (ret < 0) {
3166 if (ret != -ENOSPC) 3187 if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3239 rcu_read_lock(); 3260 rcu_read_lock();
3240 list_for_each_entry_rcu(found, head, list) { 3261 list_for_each_entry_rcu(found, head, list) {
3241 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3262 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3242 found->force_alloc = 1; 3263 found->force_alloc = CHUNK_ALLOC_FORCE;
3243 } 3264 }
3244 rcu_read_unlock(); 3265 rcu_read_unlock();
3245} 3266}
3246 3267
3247static int should_alloc_chunk(struct btrfs_root *root, 3268static int should_alloc_chunk(struct btrfs_root *root,
3248 struct btrfs_space_info *sinfo, u64 alloc_bytes) 3269 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3270 int force)
3249{ 3271{
3250 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3272 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3273 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3251 u64 thresh; 3274 u64 thresh;
3252 3275
3253 if (sinfo->bytes_used + sinfo->bytes_reserved + 3276 if (force == CHUNK_ALLOC_FORCE)
3254 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3277 return 1;
3278
3279 /*
3280 * in limited mode, we want to have some free space up to
3281 * about 1% of the FS size.
3282 */
3283 if (force == CHUNK_ALLOC_LIMITED) {
3284 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3285 thresh = max_t(u64, 64 * 1024 * 1024,
3286 div_factor_fine(thresh, 1));
3287
3288 if (num_bytes - num_allocated < thresh)
3289 return 1;
3290 }
3291
3292 /*
3293 * we have two similar checks here, one based on percentage
3294 * and once based on a hard number of 256MB. The idea
3295 * is that if we have a good amount of free
3296 * room, don't allocate a chunk. A good mount is
3297 * less than 80% utilized of the chunks we have allocated,
3298 * or more than 256MB free
3299 */
3300 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3255 return 0; 3301 return 0;
3256 3302
3257 if (sinfo->bytes_used + sinfo->bytes_reserved + 3303 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3258 alloc_bytes < div_factor(num_bytes, 8))
3259 return 0; 3304 return 0;
3260 3305
3261 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3306 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3307
3308 /* 256MB or 5% of the FS */
3262 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3309 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3263 3310
3264 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3311 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3265 return 0; 3312 return 0;
3266
3267 return 1; 3313 return 1;
3268} 3314}
3269 3315
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3273{ 3319{
3274 struct btrfs_space_info *space_info; 3320 struct btrfs_space_info *space_info;
3275 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3321 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3322 int wait_for_alloc = 0;
3276 int ret = 0; 3323 int ret = 0;
3277 3324
3278 mutex_lock(&fs_info->chunk_mutex);
3279
3280 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3325 flags = btrfs_reduce_alloc_profile(extent_root, flags);
3281 3326
3282 space_info = __find_space_info(extent_root->fs_info, flags); 3327 space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3287 } 3332 }
3288 BUG_ON(!space_info); 3333 BUG_ON(!space_info);
3289 3334
3335again:
3290 spin_lock(&space_info->lock); 3336 spin_lock(&space_info->lock);
3291 if (space_info->force_alloc) 3337 if (space_info->force_alloc)
3292 force = 1; 3338 force = space_info->force_alloc;
3293 if (space_info->full) { 3339 if (space_info->full) {
3294 spin_unlock(&space_info->lock); 3340 spin_unlock(&space_info->lock);
3295 goto out; 3341 return 0;
3296 } 3342 }
3297 3343
3298 if (!force && !should_alloc_chunk(extent_root, space_info, 3344 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3299 alloc_bytes)) {
3300 spin_unlock(&space_info->lock); 3345 spin_unlock(&space_info->lock);
3301 goto out; 3346 return 0;
3347 } else if (space_info->chunk_alloc) {
3348 wait_for_alloc = 1;
3349 } else {
3350 space_info->chunk_alloc = 1;
3302 } 3351 }
3352
3303 spin_unlock(&space_info->lock); 3353 spin_unlock(&space_info->lock);
3304 3354
3355 mutex_lock(&fs_info->chunk_mutex);
3356
3357 /*
3358 * The chunk_mutex is held throughout the entirety of a chunk
3359 * allocation, so once we've acquired the chunk_mutex we know that the
3360 * other guy is done and we need to recheck and see if we should
3361 * allocate.
3362 */
3363 if (wait_for_alloc) {
3364 mutex_unlock(&fs_info->chunk_mutex);
3365 wait_for_alloc = 0;
3366 goto again;
3367 }
3368
3305 /* 3369 /*
3306 * If we have mixed data/metadata chunks we want to make sure we keep 3370 * If we have mixed data/metadata chunks we want to make sure we keep
3307 * allocating mixed chunks instead of individual chunks. 3371 * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3327 space_info->full = 1; 3391 space_info->full = 1;
3328 else 3392 else
3329 ret = 1; 3393 ret = 1;
3330 space_info->force_alloc = 0; 3394
3395 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3396 space_info->chunk_alloc = 0;
3331 spin_unlock(&space_info->lock); 3397 spin_unlock(&space_info->lock);
3332out:
3333 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3398 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3334 return ret; 3399 return ret;
3335} 3400}
@@ -5303,11 +5368,13 @@ loop:
5303 5368
5304 if (allowed_chunk_alloc) { 5369 if (allowed_chunk_alloc) {
5305 ret = do_chunk_alloc(trans, root, num_bytes + 5370 ret = do_chunk_alloc(trans, root, num_bytes +
5306 2 * 1024 * 1024, data, 1); 5371 2 * 1024 * 1024, data,
5372 CHUNK_ALLOC_LIMITED);
5307 allowed_chunk_alloc = 0; 5373 allowed_chunk_alloc = 0;
5308 done_chunk_alloc = 1; 5374 done_chunk_alloc = 1;
5309 } else if (!done_chunk_alloc) { 5375 } else if (!done_chunk_alloc &&
5310 space_info->force_alloc = 1; 5376 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5377 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5311 } 5378 }
5312 5379
5313 if (loop < LOOP_NO_EMPTY_SIZE) { 5380 if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
5393 */ 5460 */
5394 if (empty_size || root->ref_cows) 5461 if (empty_size || root->ref_cows)
5395 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5462 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5396 num_bytes + 2 * 1024 * 1024, data, 0); 5463 num_bytes + 2 * 1024 * 1024, data,
5464 CHUNK_ALLOC_NO_FORCE);
5397 5465
5398 WARN_ON(num_bytes < root->sectorsize); 5466 WARN_ON(num_bytes < root->sectorsize);
5399 ret = find_free_extent(trans, root, num_bytes, empty_size, 5467 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
5405 num_bytes = num_bytes & ~(root->sectorsize - 1); 5473 num_bytes = num_bytes & ~(root->sectorsize - 1);
5406 num_bytes = max(num_bytes, min_alloc_size); 5474 num_bytes = max(num_bytes, min_alloc_size);
5407 do_chunk_alloc(trans, root->fs_info->extent_root, 5475 do_chunk_alloc(trans, root->fs_info->extent_root,
5408 num_bytes, data, 1); 5476 num_bytes, data, CHUNK_ALLOC_FORCE);
5409 goto again; 5477 goto again;
5410 } 5478 }
5411 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5479 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8109 8177
8110 alloc_flags = update_block_group_flags(root, cache->flags); 8178 alloc_flags = update_block_group_flags(root, cache->flags);
8111 if (alloc_flags != cache->flags) 8179 if (alloc_flags != cache->flags)
8112 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8180 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8181 CHUNK_ALLOC_FORCE);
8113 8182
8114 ret = set_block_group_ro(cache); 8183 ret = set_block_group_ro(cache);
8115 if (!ret) 8184 if (!ret)
8116 goto out; 8185 goto out;
8117 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 8186 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8118 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8187 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8188 CHUNK_ALLOC_FORCE);
8119 if (ret < 0) 8189 if (ret < 0)
8120 goto out; 8190 goto out;
8121 ret = set_block_group_ro(cache); 8191 ret = set_block_group_ro(cache);
@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8128 struct btrfs_root *root, u64 type) 8198 struct btrfs_root *root, u64 type)
8129{ 8199{
8130 u64 alloc_flags = get_alloc_profile(root, type); 8200 u64 alloc_flags = get_alloc_profile(root, type);
8131 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8201 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8202 CHUNK_ALLOC_FORCE);
8132} 8203}
8133 8204
8134/* 8205/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 20ddb28602a8..315138605088 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
690 } 690 }
691} 691}
692 692
693static void uncache_state(struct extent_state **cached_ptr)
694{
695 if (cached_ptr && (*cached_ptr)) {
696 struct extent_state *state = *cached_ptr;
697 *cached_ptr = NULL;
698 free_extent_state(state);
699 }
700}
701
693/* 702/*
694 * set some bits on a range in the tree. This may require allocations or 703 * set some bits on a range in the tree. This may require allocations or
695 * sleeping, so the gfp mask is used to indicate what is allowed. 704 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
940} 949}
941 950
942int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 951int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
943 gfp_t mask) 952 struct extent_state **cached_state, gfp_t mask)
944{ 953{
945 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 954 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
946 NULL, mask); 955 NULL, cached_state, mask);
947} 956}
948 957
949static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 958static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1012 mask); 1021 mask);
1013} 1022}
1014 1023
1015int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1024int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1016 gfp_t mask)
1017{ 1025{
1018 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1026 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1019 mask); 1027 mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1735 1743
1736 do { 1744 do {
1737 struct page *page = bvec->bv_page; 1745 struct page *page = bvec->bv_page;
1746 struct extent_state *cached = NULL;
1747 struct extent_state *state;
1748
1738 tree = &BTRFS_I(page->mapping->host)->io_tree; 1749 tree = &BTRFS_I(page->mapping->host)->io_tree;
1739 1750
1740 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1751 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1749 if (++bvec <= bvec_end) 1760 if (++bvec <= bvec_end)
1750 prefetchw(&bvec->bv_page->flags); 1761 prefetchw(&bvec->bv_page->flags);
1751 1762
1763 spin_lock(&tree->lock);
1764 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
1765 if (state && state->start == start) {
1766 /*
1767 * take a reference on the state, unlock will drop
1768 * the ref
1769 */
1770 cache_state(state, &cached);
1771 }
1772 spin_unlock(&tree->lock);
1773
1752 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1774 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1753 ret = tree->ops->readpage_end_io_hook(page, start, end, 1775 ret = tree->ops->readpage_end_io_hook(page, start, end,
1754 NULL); 1776 state);
1755 if (ret) 1777 if (ret)
1756 uptodate = 0; 1778 uptodate = 0;
1757 } 1779 }
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1764 test_bit(BIO_UPTODATE, &bio->bi_flags); 1786 test_bit(BIO_UPTODATE, &bio->bi_flags);
1765 if (err) 1787 if (err)
1766 uptodate = 0; 1788 uptodate = 0;
1789 uncache_state(&cached);
1767 continue; 1790 continue;
1768 } 1791 }
1769 } 1792 }
1770 1793
1771 if (uptodate) { 1794 if (uptodate) {
1772 set_extent_uptodate(tree, start, end, 1795 set_extent_uptodate(tree, start, end, &cached,
1773 GFP_ATOMIC); 1796 GFP_ATOMIC);
1774 } 1797 }
1775 unlock_extent(tree, start, end, GFP_ATOMIC); 1798 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1776 1799
1777 if (whole_page) { 1800 if (whole_page) {
1778 if (uptodate) { 1801 if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1811 1834
1812 do { 1835 do {
1813 struct page *page = bvec->bv_page; 1836 struct page *page = bvec->bv_page;
1837 struct extent_state *cached = NULL;
1814 tree = &BTRFS_I(page->mapping->host)->io_tree; 1838 tree = &BTRFS_I(page->mapping->host)->io_tree;
1815 1839
1816 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1840 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1821 prefetchw(&bvec->bv_page->flags); 1845 prefetchw(&bvec->bv_page->flags);
1822 1846
1823 if (uptodate) { 1847 if (uptodate) {
1824 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1848 set_extent_uptodate(tree, start, end, &cached,
1849 GFP_ATOMIC);
1825 } else { 1850 } else {
1826 ClearPageUptodate(page); 1851 ClearPageUptodate(page);
1827 SetPageError(page); 1852 SetPageError(page);
1828 } 1853 }
1829 1854
1830 unlock_extent(tree, start, end, GFP_ATOMIC); 1855 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1831 1856
1832 } while (bvec >= bio->bi_io_vec); 1857 } while (bvec >= bio->bi_io_vec);
1833 1858
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2016 while (cur <= end) { 2041 while (cur <= end) {
2017 if (cur >= last_byte) { 2042 if (cur >= last_byte) {
2018 char *userpage; 2043 char *userpage;
2044 struct extent_state *cached = NULL;
2045
2019 iosize = PAGE_CACHE_SIZE - page_offset; 2046 iosize = PAGE_CACHE_SIZE - page_offset;
2020 userpage = kmap_atomic(page, KM_USER0); 2047 userpage = kmap_atomic(page, KM_USER0);
2021 memset(userpage + page_offset, 0, iosize); 2048 memset(userpage + page_offset, 0, iosize);
2022 flush_dcache_page(page); 2049 flush_dcache_page(page);
2023 kunmap_atomic(userpage, KM_USER0); 2050 kunmap_atomic(userpage, KM_USER0);
2024 set_extent_uptodate(tree, cur, cur + iosize - 1, 2051 set_extent_uptodate(tree, cur, cur + iosize - 1,
2025 GFP_NOFS); 2052 &cached, GFP_NOFS);
2026 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2053 unlock_extent_cached(tree, cur, cur + iosize - 1,
2054 &cached, GFP_NOFS);
2027 break; 2055 break;
2028 } 2056 }
2029 em = get_extent(inode, page, page_offset, cur, 2057 em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2063 /* we've found a hole, just zero and go on */ 2091 /* we've found a hole, just zero and go on */
2064 if (block_start == EXTENT_MAP_HOLE) { 2092 if (block_start == EXTENT_MAP_HOLE) {
2065 char *userpage; 2093 char *userpage;
2094 struct extent_state *cached = NULL;
2095
2066 userpage = kmap_atomic(page, KM_USER0); 2096 userpage = kmap_atomic(page, KM_USER0);
2067 memset(userpage + page_offset, 0, iosize); 2097 memset(userpage + page_offset, 0, iosize);
2068 flush_dcache_page(page); 2098 flush_dcache_page(page);
2069 kunmap_atomic(userpage, KM_USER0); 2099 kunmap_atomic(userpage, KM_USER0);
2070 2100
2071 set_extent_uptodate(tree, cur, cur + iosize - 1, 2101 set_extent_uptodate(tree, cur, cur + iosize - 1,
2072 GFP_NOFS); 2102 &cached, GFP_NOFS);
2073 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2103 unlock_extent_cached(tree, cur, cur + iosize - 1,
2104 &cached, GFP_NOFS);
2074 cur = cur + iosize; 2105 cur = cur + iosize;
2075 page_offset += iosize; 2106 page_offset += iosize;
2076 continue; 2107 continue;
@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
2789 iocount++; 2820 iocount++;
2790 block_start = block_start + iosize; 2821 block_start = block_start + iosize;
2791 } else { 2822 } else {
2792 set_extent_uptodate(tree, block_start, cur_end, 2823 struct extent_state *cached = NULL;
2824
2825 set_extent_uptodate(tree, block_start, cur_end, &cached,
2793 GFP_NOFS); 2826 GFP_NOFS);
2794 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2827 unlock_extent_cached(tree, block_start, cur_end,
2828 &cached, GFP_NOFS);
2795 block_start = cur_end + 1; 2829 block_start = cur_end + 1;
2796 } 2830 }
2797 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2831 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3457 num_pages = num_extent_pages(eb->start, eb->len); 3491 num_pages = num_extent_pages(eb->start, eb->len);
3458 3492
3459 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3493 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3460 GFP_NOFS); 3494 NULL, GFP_NOFS);
3461 for (i = 0; i < num_pages; i++) { 3495 for (i = 0; i < num_pages; i++) {
3462 page = extent_buffer_page(eb, i); 3496 page = extent_buffer_page(eb, i);
3463 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3497 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
3885 kunmap_atomic(dst_kaddr, KM_USER0); 3919 kunmap_atomic(dst_kaddr, KM_USER0);
3886} 3920}
3887 3921
3922static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
3923{
3924 unsigned long distance = (src > dst) ? src - dst : dst - src;
3925 return distance < len;
3926}
3927
3888static void copy_pages(struct page *dst_page, struct page *src_page, 3928static void copy_pages(struct page *dst_page, struct page *src_page,
3889 unsigned long dst_off, unsigned long src_off, 3929 unsigned long dst_off, unsigned long src_off,
3890 unsigned long len) 3930 unsigned long len)
@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
3892 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3932 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3893 char *src_kaddr; 3933 char *src_kaddr;
3894 3934
3895 if (dst_page != src_page) 3935 if (dst_page != src_page) {
3896 src_kaddr = kmap_atomic(src_page, KM_USER1); 3936 src_kaddr = kmap_atomic(src_page, KM_USER1);
3897 else 3937 } else {
3898 src_kaddr = dst_kaddr; 3938 src_kaddr = dst_kaddr;
3939 BUG_ON(areas_overlap(src_off, dst_off, len));
3940 }
3899 3941
3900 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3942 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3901 kunmap_atomic(dst_kaddr, KM_USER0); 3943 kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3970 "len %lu len %lu\n", dst_offset, len, dst->len); 4012 "len %lu len %lu\n", dst_offset, len, dst->len);
3971 BUG_ON(1); 4013 BUG_ON(1);
3972 } 4014 }
3973 if (dst_offset < src_offset) { 4015 if (!areas_overlap(src_offset, dst_offset, len)) {
3974 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4016 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3975 return; 4017 return;
3976 } 4018 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f62c5442835d..af2d7179c372 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
208 int bits, int exclusive_bits, u64 *failed_start, 208 int bits, int exclusive_bits, u64 *failed_start,
209 struct extent_state **cached_state, gfp_t mask); 209 struct extent_state **cached_state, gfp_t mask);
210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
211 gfp_t mask); 211 struct extent_state **cached_state, gfp_t mask);
212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
213 gfp_t mask); 213 gfp_t mask);
214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2b6c12e983b3..a24a3f2fa13e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -243,7 +243,7 @@ out:
243 * Insert @em into @tree or perform a simple forward/backward merge with 243 * Insert @em into @tree or perform a simple forward/backward merge with
244 * existing mappings. The extent_map struct passed in will be inserted 244 * existing mappings. The extent_map struct passed in will be inserted
245 * into the tree directly, with an additional reference taken, or a 245 * into the tree directly, with an additional reference taken, or a
246 * reference dropped if the merge attempt was successfull. 246 * reference dropped if the merge attempt was successful.
247 */ 247 */
248int add_extent_mapping(struct extent_map_tree *tree, 248int add_extent_mapping(struct extent_map_tree *tree,
249 struct extent_map *em) 249 struct extent_map *em)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 656bc0a892b1..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
104/* 104/*
105 * unlocks pages after btrfs_file_write is done with them 105 * unlocks pages after btrfs_file_write is done with them
106 */ 106 */
107static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) 107void btrfs_drop_pages(struct page **pages, size_t num_pages)
108{ 108{
109 size_t i; 109 size_t i;
110 for (i = 0; i < num_pages; i++) { 110 for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
127 * this also makes the decision about creating an inline extent vs 127 * this also makes the decision about creating an inline extent vs
128 * doing real data extents, marking pages dirty and delalloc as required. 128 * doing real data extents, marking pages dirty and delalloc as required.
129 */ 129 */
130static noinline int dirty_and_release_pages(struct btrfs_root *root, 130int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
131 struct file *file, 131 struct page **pages, size_t num_pages,
132 struct page **pages, 132 loff_t pos, size_t write_bytes,
133 size_t num_pages, 133 struct extent_state **cached)
134 loff_t pos,
135 size_t write_bytes)
136{ 134{
137 int err = 0; 135 int err = 0;
138 int i; 136 int i;
139 struct inode *inode = fdentry(file)->d_inode;
140 u64 num_bytes; 137 u64 num_bytes;
141 u64 start_pos; 138 u64 start_pos;
142 u64 end_of_last_block; 139 u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
149 146
150 end_of_last_block = start_pos + num_bytes - 1; 147 end_of_last_block = start_pos + num_bytes - 1;
151 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 148 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
152 NULL); 149 cached);
153 if (err) 150 if (err)
154 return err; 151 return err;
155 152
@@ -906,7 +903,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
906 unsigned long last_index; 903 unsigned long last_index;
907 size_t num_written = 0; 904 size_t num_written = 0;
908 int nrptrs; 905 int nrptrs;
909 int ret; 906 int ret = 0;
910 907
911 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 908 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
912 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 909 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
992 } 989 }
993 990
994 if (copied > 0) { 991 if (copied > 0) {
995 ret = dirty_and_release_pages(root, file, pages, 992 ret = btrfs_dirty_pages(root, inode, pages,
996 dirty_pages, pos, 993 dirty_pages, pos, copied,
997 copied); 994 NULL);
998 if (ret) { 995 if (ret) {
999 btrfs_delalloc_release_space(inode, 996 btrfs_delalloc_release_space(inode,
1000 dirty_pages << PAGE_CACHE_SHIFT); 997 dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0037427d8a9d..11d2e9cea09e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -24,6 +24,7 @@
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h"
27 28
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
81 return ERR_PTR(-ENOENT); 82 return ERR_PTR(-ENOENT);
82 } 83 }
83 84
85 inode->i_mapping->flags &= ~__GFP_FS;
86
84 spin_lock(&block_group->lock); 87 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) { 88 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode); 89 block_group->inode = igrab(inode);
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
222 u64 num_entries; 225 u64 num_entries;
223 u64 num_bitmaps; 226 u64 num_bitmaps;
224 u64 generation; 227 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
225 u32 cur_crc = ~(u32)0; 229 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0; 230 pgoff_t index = 0;
227 unsigned long first_page_offset; 231 unsigned long first_page_offset;
@@ -467,6 +471,17 @@ next:
467 index++; 471 index++;
468 } 472 }
469 473
474 spin_lock(&block_group->tree_lock);
475 if (block_group->free_space != (block_group->key.offset - used -
476 block_group->bytes_super)) {
477 spin_unlock(&block_group->tree_lock);
478 printk(KERN_ERR "block group %llu has an wrong amount of free "
479 "space\n", block_group->key.objectid);
480 ret = 0;
481 goto free_cache;
482 }
483 spin_unlock(&block_group->tree_lock);
484
470 ret = 1; 485 ret = 1;
471out: 486out:
472 kfree(checksums); 487 kfree(checksums);
@@ -493,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root,
493 struct inode *inode; 508 struct inode *inode;
494 struct rb_node *node; 509 struct rb_node *node;
495 struct list_head *pos, *n; 510 struct list_head *pos, *n;
511 struct page **pages;
496 struct page *page; 512 struct page *page;
497 struct extent_state *cached_state = NULL; 513 struct extent_state *cached_state = NULL;
514 struct btrfs_free_cluster *cluster = NULL;
515 struct extent_io_tree *unpin = NULL;
498 struct list_head bitmap_list; 516 struct list_head bitmap_list;
499 struct btrfs_key key; 517 struct btrfs_key key;
518 u64 start, end, len;
500 u64 bytes = 0; 519 u64 bytes = 0;
501 u32 *crc, *checksums; 520 u32 *crc, *checksums;
502 pgoff_t index = 0, last_index = 0;
503 unsigned long first_page_offset; 521 unsigned long first_page_offset;
504 int num_checksums; 522 int index = 0, num_pages = 0;
505 int entries = 0; 523 int entries = 0;
506 int bitmaps = 0; 524 int bitmaps = 0;
507 int ret = 0; 525 int ret = 0;
526 bool next_page = false;
527 bool out_of_space = false;
508 528
509 root = root->fs_info->tree_root; 529 root = root->fs_info->tree_root;
510 530
@@ -532,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root,
532 return 0; 552 return 0;
533 } 553 }
534 554
535 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT;
536 filemap_write_and_wait(inode->i_mapping); 557 filemap_write_and_wait(inode->i_mapping);
537 btrfs_wait_ordered_range(inode, inode->i_size & 558 btrfs_wait_ordered_range(inode, inode->i_size &
538 ~(root->sectorsize - 1), (u64)-1); 559 ~(root->sectorsize - 1), (u64)-1);
539 560
540 /* We need a checksum per page. */ 561 /* We need a checksum per page. */
541 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
542 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
543 if (!crc) { 563 if (!crc) {
544 iput(inode); 564 iput(inode);
545 return 0; 565 return 0;
546 } 566 }
547 567
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) {
570 kfree(crc);
571 iput(inode);
572 return 0;
573 }
574
548 /* Since the first page has all of our checksums and our generation we 575 /* Since the first page has all of our checksums and our generation we
549 * need to calculate the offset into the page that we can start writing 576 * need to calculate the offset into the page that we can start writing
550 * our entries. 577 * our entries.
551 */ 578 */
552 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580
581 /* Get the cluster for this block_group if it exists */
582 if (!list_empty(&block_group->cluster_list))
583 cluster = list_entry(block_group->cluster_list.next,
584 struct btrfs_free_cluster,
585 block_group_list);
586
587 /*
588 * We shouldn't have switched the pinned extents yet so this is the
589 * right one
590 */
591 unpin = root->fs_info->pinned_extents;
553 592
554 /* 593 /*
555 * Lock all pages first so we can lock the extent safely. 594 * Lock all pages first so we can lock the extent safely.
@@ -559,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
559 * after find_get_page at this point. Just putting this here so people 598 * after find_get_page at this point. Just putting this here so people
560 * know and don't freak out. 599 * know and don't freak out.
561 */ 600 */
562 while (index <= last_index) { 601 while (index < num_pages) {
563 page = grab_cache_page(inode->i_mapping, index); 602 page = grab_cache_page(inode->i_mapping, index);
564 if (!page) { 603 if (!page) {
565 pgoff_t i = 0; 604 int i;
566 605
567 while (i < index) { 606 for (i = 0; i < num_pages; i++) {
568 page = find_get_page(inode->i_mapping, i); 607 unlock_page(pages[i]);
569 unlock_page(page); 608 page_cache_release(pages[i]);
570 page_cache_release(page);
571 page_cache_release(page);
572 i++;
573 } 609 }
574 goto out_free; 610 goto out_free;
575 } 611 }
612 pages[index] = page;
576 index++; 613 index++;
577 } 614 }
578 615
@@ -580,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
580 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 617 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
581 0, &cached_state, GFP_NOFS); 618 0, &cached_state, GFP_NOFS);
582 619
620 /*
621 * When searching for pinned extents, we need to start at our start
622 * offset.
623 */
624 start = block_group->key.objectid;
625
583 /* Write out the extent entries */ 626 /* Write out the extent entries */
584 do { 627 do {
585 struct btrfs_free_space_entry *entry; 628 struct btrfs_free_space_entry *entry;
@@ -587,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root,
587 unsigned long offset = 0; 630 unsigned long offset = 0;
588 unsigned long start_offset = 0; 631 unsigned long start_offset = 0;
589 632
633 next_page = false;
634
590 if (index == 0) { 635 if (index == 0) {
591 start_offset = first_page_offset; 636 start_offset = first_page_offset;
592 offset = start_offset; 637 offset = start_offset;
593 } 638 }
594 639
595 page = find_get_page(inode->i_mapping, index); 640 if (index >= num_pages) {
641 out_of_space = true;
642 break;
643 }
644
645 page = pages[index];
596 646
597 addr = kmap(page); 647 addr = kmap(page);
598 entry = addr + start_offset; 648 entry = addr + start_offset;
599 649
600 memset(addr, 0, PAGE_CACHE_SIZE); 650 memset(addr, 0, PAGE_CACHE_SIZE);
601 while (1) { 651 while (node && !next_page) {
602 struct btrfs_free_space *e; 652 struct btrfs_free_space *e;
603 653
604 e = rb_entry(node, struct btrfs_free_space, offset_index); 654 e = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -614,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root,
614 entry->type = BTRFS_FREE_SPACE_EXTENT; 664 entry->type = BTRFS_FREE_SPACE_EXTENT;
615 } 665 }
616 node = rb_next(node); 666 node = rb_next(node);
617 if (!node) 667 if (!node && cluster) {
618 break; 668 node = rb_first(&cluster->root);
669 cluster = NULL;
670 }
619 offset += sizeof(struct btrfs_free_space_entry); 671 offset += sizeof(struct btrfs_free_space_entry);
620 if (offset + sizeof(struct btrfs_free_space_entry) >= 672 if (offset + sizeof(struct btrfs_free_space_entry) >=
621 PAGE_CACHE_SIZE) 673 PAGE_CACHE_SIZE)
674 next_page = true;
675 entry++;
676 }
677
678 /*
679 * We want to add any pinned extents to our free space cache
680 * so we don't leak the space
681 */
682 while (!next_page && (start < block_group->key.objectid +
683 block_group->key.offset)) {
684 ret = find_first_extent_bit(unpin, start, &start, &end,
685 EXTENT_DIRTY);
686 if (ret) {
687 ret = 0;
622 break; 688 break;
689 }
690
691 /* This pinned extent is out of our range */
692 if (start >= block_group->key.objectid +
693 block_group->key.offset)
694 break;
695
696 len = block_group->key.objectid +
697 block_group->key.offset - start;
698 len = min(len, end + 1 - start);
699
700 entries++;
701 entry->offset = cpu_to_le64(start);
702 entry->bytes = cpu_to_le64(len);
703 entry->type = BTRFS_FREE_SPACE_EXTENT;
704
705 start = end + 1;
706 offset += sizeof(struct btrfs_free_space_entry);
707 if (offset + sizeof(struct btrfs_free_space_entry) >=
708 PAGE_CACHE_SIZE)
709 next_page = true;
623 entry++; 710 entry++;
624 } 711 }
625 *crc = ~(u32)0; 712 *crc = ~(u32)0;
@@ -632,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
632 719
633 bytes += PAGE_CACHE_SIZE; 720 bytes += PAGE_CACHE_SIZE;
634 721
635 ClearPageChecked(page);
636 set_page_extent_mapped(page);
637 SetPageUptodate(page);
638 set_page_dirty(page);
639
640 /*
641 * We need to release our reference we got for grab_cache_page,
642 * except for the first page which will hold our checksums, we
643 * do that below.
644 */
645 if (index != 0) {
646 unlock_page(page);
647 page_cache_release(page);
648 }
649
650 page_cache_release(page);
651
652 index++; 722 index++;
653 } while (node); 723 } while (node || next_page);
654 724
655 /* Write out the bitmaps */ 725 /* Write out the bitmaps */
656 list_for_each_safe(pos, n, &bitmap_list) { 726 list_for_each_safe(pos, n, &bitmap_list) {
@@ -658,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
658 struct btrfs_free_space *entry = 728 struct btrfs_free_space *entry =
659 list_entry(pos, struct btrfs_free_space, list); 729 list_entry(pos, struct btrfs_free_space, list);
660 730
661 page = find_get_page(inode->i_mapping, index); 731 if (index >= num_pages) {
732 out_of_space = true;
733 break;
734 }
735 page = pages[index];
662 736
663 addr = kmap(page); 737 addr = kmap(page);
664 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 738 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -669,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
669 crc++; 743 crc++;
670 bytes += PAGE_CACHE_SIZE; 744 bytes += PAGE_CACHE_SIZE;
671 745
672 ClearPageChecked(page);
673 set_page_extent_mapped(page);
674 SetPageUptodate(page);
675 set_page_dirty(page);
676 unlock_page(page);
677 page_cache_release(page);
678 page_cache_release(page);
679 list_del_init(&entry->list); 746 list_del_init(&entry->list);
680 index++; 747 index++;
681 } 748 }
682 749
750 if (out_of_space) {
751 btrfs_drop_pages(pages, num_pages);
752 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753 i_size_read(inode) - 1, &cached_state,
754 GFP_NOFS);
755 ret = 0;
756 goto out_free;
757 }
758
683 /* Zero out the rest of the pages just to make sure */ 759 /* Zero out the rest of the pages just to make sure */
684 while (index <= last_index) { 760 while (index < num_pages) {
685 void *addr; 761 void *addr;
686 762
687 page = find_get_page(inode->i_mapping, index); 763 page = pages[index];
688
689 addr = kmap(page); 764 addr = kmap(page);
690 memset(addr, 0, PAGE_CACHE_SIZE); 765 memset(addr, 0, PAGE_CACHE_SIZE);
691 kunmap(page); 766 kunmap(page);
692 ClearPageChecked(page);
693 set_page_extent_mapped(page);
694 SetPageUptodate(page);
695 set_page_dirty(page);
696 unlock_page(page);
697 page_cache_release(page);
698 page_cache_release(page);
699 bytes += PAGE_CACHE_SIZE; 767 bytes += PAGE_CACHE_SIZE;
700 index++; 768 index++;
701 } 769 }
702 770
703 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
704
705 /* Write the checksums and trans id to the first page */ 771 /* Write the checksums and trans id to the first page */
706 { 772 {
707 void *addr; 773 void *addr;
708 u64 *gen; 774 u64 *gen;
709 775
710 page = find_get_page(inode->i_mapping, 0); 776 page = pages[0];
711 777
712 addr = kmap(page); 778 addr = kmap(page);
713 memcpy(addr, checksums, sizeof(u32) * num_checksums); 779 memcpy(addr, checksums, sizeof(u32) * num_pages);
714 gen = addr + (sizeof(u32) * num_checksums); 780 gen = addr + (sizeof(u32) * num_pages);
715 *gen = trans->transid; 781 *gen = trans->transid;
716 kunmap(page); 782 kunmap(page);
717 ClearPageChecked(page);
718 set_page_extent_mapped(page);
719 SetPageUptodate(page);
720 set_page_dirty(page);
721 unlock_page(page);
722 page_cache_release(page);
723 page_cache_release(page);
724 } 783 }
725 BTRFS_I(inode)->generation = trans->transid;
726 784
785 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786 bytes, &cached_state);
787 btrfs_drop_pages(pages, num_pages);
727 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
728 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
729 790
791 if (ret) {
792 ret = 0;
793 goto out_free;
794 }
795
796 BTRFS_I(inode)->generation = trans->transid;
797
730 filemap_write_and_wait(inode->i_mapping); 798 filemap_write_and_wait(inode->i_mapping);
731 799
732 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -777,6 +845,7 @@ out_free:
777 BTRFS_I(inode)->generation = 0; 845 BTRFS_I(inode)->generation = 0;
778 } 846 }
779 kfree(checksums); 847 kfree(checksums);
848 kfree(pages);
780 btrfs_update_inode(trans, root, inode); 849 btrfs_update_inode(trans, root, inode);
781 iput(inode); 850 iput(inode);
782 return ret; 851 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 93c28a1d6bdc..fcd66b6a8086 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -112,6 +112,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, 112static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
113 struct btrfs_root *root, struct inode *inode, 113 struct btrfs_root *root, struct inode *inode,
114 u64 start, size_t size, size_t compressed_size, 114 u64 start, size_t size, size_t compressed_size,
115 int compress_type,
115 struct page **compressed_pages) 116 struct page **compressed_pages)
116{ 117{
117 struct btrfs_key key; 118 struct btrfs_key key;
@@ -126,12 +127,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
126 size_t cur_size = size; 127 size_t cur_size = size;
127 size_t datasize; 128 size_t datasize;
128 unsigned long offset; 129 unsigned long offset;
129 int compress_type = BTRFS_COMPRESS_NONE;
130 130
131 if (compressed_size && compressed_pages) { 131 if (compressed_size && compressed_pages)
132 compress_type = root->fs_info->compress_type;
133 cur_size = compressed_size; 132 cur_size = compressed_size;
134 }
135 133
136 path = btrfs_alloc_path(); 134 path = btrfs_alloc_path();
137 if (!path) 135 if (!path)
@@ -221,7 +219,7 @@ fail:
221static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, 219static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
222 struct btrfs_root *root, 220 struct btrfs_root *root,
223 struct inode *inode, u64 start, u64 end, 221 struct inode *inode, u64 start, u64 end,
224 size_t compressed_size, 222 size_t compressed_size, int compress_type,
225 struct page **compressed_pages) 223 struct page **compressed_pages)
226{ 224{
227 u64 isize = i_size_read(inode); 225 u64 isize = i_size_read(inode);
@@ -254,7 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
254 inline_len = min_t(u64, isize, actual_end); 252 inline_len = min_t(u64, isize, actual_end);
255 ret = insert_inline_extent(trans, root, inode, start, 253 ret = insert_inline_extent(trans, root, inode, start,
256 inline_len, compressed_size, 254 inline_len, compressed_size,
257 compressed_pages); 255 compress_type, compressed_pages);
258 BUG_ON(ret); 256 BUG_ON(ret);
259 btrfs_delalloc_release_metadata(inode, end + 1 - start); 257 btrfs_delalloc_release_metadata(inode, end + 1 - start);
260 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 258 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
@@ -433,12 +431,13 @@ again:
433 * to make an uncompressed inline extent. 431 * to make an uncompressed inline extent.
434 */ 432 */
435 ret = cow_file_range_inline(trans, root, inode, 433 ret = cow_file_range_inline(trans, root, inode,
436 start, end, 0, NULL); 434 start, end, 0, 0, NULL);
437 } else { 435 } else {
438 /* try making a compressed inline extent */ 436 /* try making a compressed inline extent */
439 ret = cow_file_range_inline(trans, root, inode, 437 ret = cow_file_range_inline(trans, root, inode,
440 start, end, 438 start, end,
441 total_compressed, pages); 439 total_compressed,
440 compress_type, pages);
442 } 441 }
443 if (ret == 0) { 442 if (ret == 0) {
444 /* 443 /*
@@ -792,7 +791,7 @@ static noinline int cow_file_range(struct inode *inode,
792 if (start == 0) { 791 if (start == 0) {
793 /* lets try to make an inline extent */ 792 /* lets try to make an inline extent */
794 ret = cow_file_range_inline(trans, root, inode, 793 ret = cow_file_range_inline(trans, root, inode,
795 start, end, 0, NULL); 794 start, end, 0, 0, NULL);
796 if (ret == 0) { 795 if (ret == 0) {
797 extent_clear_unlock_delalloc(inode, 796 extent_clear_unlock_delalloc(inode,
798 &BTRFS_I(inode)->io_tree, 797 &BTRFS_I(inode)->io_tree,
@@ -1771,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1771 add_pending_csums(trans, inode, ordered_extent->file_offset, 1770 add_pending_csums(trans, inode, ordered_extent->file_offset,
1772 &ordered_extent->list); 1771 &ordered_extent->list);
1773 1772
1774 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1773 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1775 ret = btrfs_update_inode(trans, root, inode); 1774 if (!ret) {
1776 BUG_ON(ret); 1775 ret = btrfs_update_inode(trans, root, inode);
1776 BUG_ON(ret);
1777 }
1778 ret = 0;
1777out: 1779out:
1778 if (nolock) { 1780 if (nolock) {
1779 if (trans) 1781 if (trans)
@@ -2222,8 +2224,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2222 insert = 1; 2224 insert = 1;
2223#endif 2225#endif
2224 insert = 1; 2226 insert = 1;
2225 } else {
2226 WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
2227 } 2227 }
2228 2228
2229 if (!BTRFS_I(inode)->orphan_meta_reserved) { 2229 if (!BTRFS_I(inode)->orphan_meta_reserved) {
@@ -2324,7 +2324,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2324 2324
2325 /* 2325 /*
2326 * if ret == 0 means we found what we were searching for, which 2326 * if ret == 0 means we found what we were searching for, which
2327 * is weird, but possible, so only screw with path if we didnt 2327 * is weird, but possible, so only screw with path if we didn't
2328 * find the key and see if we have stuff that matches 2328 * find the key and see if we have stuff that matches
2329 */ 2329 */
2330 if (ret > 0) { 2330 if (ret > 0) {
@@ -2537,8 +2537,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2537 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2537 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2538 2538
2539 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2539 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2540 if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
2541 inode->i_mapping->flags &= ~__GFP_FS;
2542 2540
2543 /* 2541 /*
2544 * try to precache a NULL acl entry for files that don't have 2542 * try to precache a NULL acl entry for files that don't have
@@ -2595,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2595 struct btrfs_inode_item *item, 2593 struct btrfs_inode_item *item,
2596 struct inode *inode) 2594 struct inode *inode)
2597{ 2595{
2596 if (!leaf->map_token)
2597 map_private_extent_buffer(leaf, (unsigned long)item,
2598 sizeof(struct btrfs_inode_item),
2599 &leaf->map_token, &leaf->kaddr,
2600 &leaf->map_start, &leaf->map_len,
2601 KM_USER1);
2602
2598 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2603 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2599 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2604 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2600 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2605 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2623,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2623 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2628 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2624 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2629 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2625 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2630 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
2631
2632 if (leaf->map_token) {
2633 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2634 leaf->map_token = NULL;
2635 }
2626} 2636}
2627 2637
2628/* 2638/*
@@ -4212,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4212 struct btrfs_key found_key; 4222 struct btrfs_key found_key;
4213 struct btrfs_path *path; 4223 struct btrfs_path *path;
4214 int ret; 4224 int ret;
4215 u32 nritems;
4216 struct extent_buffer *leaf; 4225 struct extent_buffer *leaf;
4217 int slot; 4226 int slot;
4218 int advance;
4219 unsigned char d_type; 4227 unsigned char d_type;
4220 int over = 0; 4228 int over = 0;
4221 u32 di_cur; 4229 u32 di_cur;
@@ -4258,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4258 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4266 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4259 if (ret < 0) 4267 if (ret < 0)
4260 goto err; 4268 goto err;
4261 advance = 0;
4262 4269
4263 while (1) { 4270 while (1) {
4264 leaf = path->nodes[0]; 4271 leaf = path->nodes[0];
4265 nritems = btrfs_header_nritems(leaf);
4266 slot = path->slots[0]; 4272 slot = path->slots[0];
4267 if (advance || slot >= nritems) { 4273 if (slot >= btrfs_header_nritems(leaf)) {
4268 if (slot >= nritems - 1) { 4274 ret = btrfs_next_leaf(root, path);
4269 ret = btrfs_next_leaf(root, path); 4275 if (ret < 0)
4270 if (ret) 4276 goto err;
4271 break; 4277 else if (ret > 0)
4272 leaf = path->nodes[0]; 4278 break;
4273 nritems = btrfs_header_nritems(leaf); 4279 continue;
4274 slot = path->slots[0];
4275 } else {
4276 slot++;
4277 path->slots[0]++;
4278 }
4279 } 4280 }
4280 4281
4281 advance = 1;
4282 item = btrfs_item_nr(leaf, slot); 4282 item = btrfs_item_nr(leaf, slot);
4283 btrfs_item_key_to_cpu(leaf, &found_key, slot); 4283 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4284 4284
@@ -4287,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4287 if (btrfs_key_type(&found_key) != key_type) 4287 if (btrfs_key_type(&found_key) != key_type)
4288 break; 4288 break;
4289 if (found_key.offset < filp->f_pos) 4289 if (found_key.offset < filp->f_pos)
4290 continue; 4290 goto next;
4291 4291
4292 filp->f_pos = found_key.offset; 4292 filp->f_pos = found_key.offset;
4293 4293
@@ -4340,6 +4340,8 @@ skip:
4340 di_cur += di_len; 4340 di_cur += di_len;
4341 di = (struct btrfs_dir_item *)((char *)di + di_len); 4341 di = (struct btrfs_dir_item *)((char *)di + di_len);
4342 } 4342 }
4343next:
4344 path->slots[0]++;
4343 } 4345 }
4344 4346
4345 /* Reached end of directory/root. Bump pos past the last item. */ 4347 /* Reached end of directory/root. Bump pos past the last item. */
@@ -4532,14 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4532 BUG_ON(!path); 4534 BUG_ON(!path);
4533 4535
4534 inode = new_inode(root->fs_info->sb); 4536 inode = new_inode(root->fs_info->sb);
4535 if (!inode) 4537 if (!inode) {
4538 btrfs_free_path(path);
4536 return ERR_PTR(-ENOMEM); 4539 return ERR_PTR(-ENOMEM);
4540 }
4537 4541
4538 if (dir) { 4542 if (dir) {
4539 trace_btrfs_inode_request(dir); 4543 trace_btrfs_inode_request(dir);
4540 4544
4541 ret = btrfs_set_inode_index(dir, index); 4545 ret = btrfs_set_inode_index(dir, index);
4542 if (ret) { 4546 if (ret) {
4547 btrfs_free_path(path);
4543 iput(inode); 4548 iput(inode);
4544 return ERR_PTR(ret); 4549 return ERR_PTR(ret);
4545 } 4550 }
@@ -4839,9 +4844,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4839 if (inode->i_nlink == ~0U) 4844 if (inode->i_nlink == ~0U)
4840 return -EMLINK; 4845 return -EMLINK;
4841 4846
4842 btrfs_inc_nlink(inode);
4843 inode->i_ctime = CURRENT_TIME;
4844
4845 err = btrfs_set_inode_index(dir, &index); 4847 err = btrfs_set_inode_index(dir, &index);
4846 if (err) 4848 if (err)
4847 goto fail; 4849 goto fail;
@@ -4857,6 +4859,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4857 goto fail; 4859 goto fail;
4858 } 4860 }
4859 4861
4862 btrfs_inc_nlink(inode);
4863 inode->i_ctime = CURRENT_TIME;
4864
4860 btrfs_set_trans_block_group(trans, dir); 4865 btrfs_set_trans_block_group(trans, dir);
4861 ihold(inode); 4866 ihold(inode);
4862 4867
@@ -5226,7 +5231,7 @@ again:
5226 btrfs_mark_buffer_dirty(leaf); 5231 btrfs_mark_buffer_dirty(leaf);
5227 } 5232 }
5228 set_extent_uptodate(io_tree, em->start, 5233 set_extent_uptodate(io_tree, em->start,
5229 extent_map_end(em) - 1, GFP_NOFS); 5234 extent_map_end(em) - 1, NULL, GFP_NOFS);
5230 goto insert; 5235 goto insert;
5231 } else { 5236 } else {
5232 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); 5237 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5433,17 +5438,30 @@ out:
5433} 5438}
5434 5439
5435static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5440static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5441 struct extent_map *em,
5436 u64 start, u64 len) 5442 u64 start, u64 len)
5437{ 5443{
5438 struct btrfs_root *root = BTRFS_I(inode)->root; 5444 struct btrfs_root *root = BTRFS_I(inode)->root;
5439 struct btrfs_trans_handle *trans; 5445 struct btrfs_trans_handle *trans;
5440 struct extent_map *em;
5441 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5446 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5442 struct btrfs_key ins; 5447 struct btrfs_key ins;
5443 u64 alloc_hint; 5448 u64 alloc_hint;
5444 int ret; 5449 int ret;
5450 bool insert = false;
5445 5451
5446 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5452 /*
5453 * Ok if the extent map we looked up is a hole and is for the exact
5454 * range we want, there is no reason to allocate a new one, however if
5455 * it is not right then we need to free this one and drop the cache for
5456 * our range.
5457 */
5458 if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
5459 em->len != len) {
5460 free_extent_map(em);
5461 em = NULL;
5462 insert = true;
5463 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5464 }
5447 5465
5448 trans = btrfs_join_transaction(root, 0); 5466 trans = btrfs_join_transaction(root, 0);
5449 if (IS_ERR(trans)) 5467 if (IS_ERR(trans))
@@ -5459,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5459 goto out; 5477 goto out;
5460 } 5478 }
5461 5479
5462 em = alloc_extent_map(GFP_NOFS);
5463 if (!em) { 5480 if (!em) {
5464 em = ERR_PTR(-ENOMEM); 5481 em = alloc_extent_map(GFP_NOFS);
5465 goto out; 5482 if (!em) {
5483 em = ERR_PTR(-ENOMEM);
5484 goto out;
5485 }
5466 } 5486 }
5467 5487
5468 em->start = start; 5488 em->start = start;
@@ -5472,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5472 em->block_start = ins.objectid; 5492 em->block_start = ins.objectid;
5473 em->block_len = ins.offset; 5493 em->block_len = ins.offset;
5474 em->bdev = root->fs_info->fs_devices->latest_bdev; 5494 em->bdev = root->fs_info->fs_devices->latest_bdev;
5495
5496 /*
5497 * We need to do this because if we're using the original em we searched
5498 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
5499 */
5500 em->flags = 0;
5475 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5501 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5476 5502
5477 while (1) { 5503 while (insert) {
5478 write_lock(&em_tree->lock); 5504 write_lock(&em_tree->lock);
5479 ret = add_extent_mapping(em_tree, em); 5505 ret = add_extent_mapping(em_tree, em);
5480 write_unlock(&em_tree->lock); 5506 write_unlock(&em_tree->lock);
@@ -5692,8 +5718,7 @@ must_cow:
5692 * it above 5718 * it above
5693 */ 5719 */
5694 len = bh_result->b_size; 5720 len = bh_result->b_size;
5695 free_extent_map(em); 5721 em = btrfs_new_extent_direct(inode, em, start, len);
5696 em = btrfs_new_extent_direct(inode, start, len);
5697 if (IS_ERR(em)) 5722 if (IS_ERR(em))
5698 return PTR_ERR(em); 5723 return PTR_ERR(em);
5699 len = min(len, em->len - (start - em->start)); 5724 len = min(len, em->len - (start - em->start));
@@ -5856,8 +5881,10 @@ again:
5856 } 5881 }
5857 5882
5858 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); 5883 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5859 btrfs_ordered_update_i_size(inode, 0, ordered); 5884 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5860 btrfs_update_inode(trans, root, inode); 5885 if (!ret)
5886 btrfs_update_inode(trans, root, inode);
5887 ret = 0;
5861out_unlock: 5888out_unlock:
5862 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5889 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5863 ordered->file_offset + ordered->len - 1, 5890 ordered->file_offset + ordered->len - 1,
@@ -5943,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5943 5970
5944static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 5971static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5945 int rw, u64 file_offset, int skip_sum, 5972 int rw, u64 file_offset, int skip_sum,
5946 u32 *csums) 5973 u32 *csums, int async_submit)
5947{ 5974{
5948 int write = rw & REQ_WRITE; 5975 int write = rw & REQ_WRITE;
5949 struct btrfs_root *root = BTRFS_I(inode)->root; 5976 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5954,13 +5981,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5954 if (ret) 5981 if (ret)
5955 goto err; 5982 goto err;
5956 5983
5957 if (write && !skip_sum) { 5984 if (skip_sum)
5985 goto map;
5986
5987 if (write && async_submit) {
5958 ret = btrfs_wq_submit_bio(root->fs_info, 5988 ret = btrfs_wq_submit_bio(root->fs_info,
5959 inode, rw, bio, 0, 0, 5989 inode, rw, bio, 0, 0,
5960 file_offset, 5990 file_offset,
5961 __btrfs_submit_bio_start_direct_io, 5991 __btrfs_submit_bio_start_direct_io,
5962 __btrfs_submit_bio_done); 5992 __btrfs_submit_bio_done);
5963 goto err; 5993 goto err;
5994 } else if (write) {
5995 /*
5996 * If we aren't doing async submit, calculate the csum of the
5997 * bio now.
5998 */
5999 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
6000 if (ret)
6001 goto err;
5964 } else if (!skip_sum) { 6002 } else if (!skip_sum) {
5965 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6003 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
5966 file_offset, csums); 6004 file_offset, csums);
@@ -5968,7 +6006,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5968 goto err; 6006 goto err;
5969 } 6007 }
5970 6008
5971 ret = btrfs_map_bio(root, rw, bio, 0, 1); 6009map:
6010 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
5972err: 6011err:
5973 bio_put(bio); 6012 bio_put(bio);
5974 return ret; 6013 return ret;
@@ -5990,15 +6029,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5990 int nr_pages = 0; 6029 int nr_pages = 0;
5991 u32 *csums = dip->csums; 6030 u32 *csums = dip->csums;
5992 int ret = 0; 6031 int ret = 0;
6032 int async_submit = 0;
5993 int write = rw & REQ_WRITE; 6033 int write = rw & REQ_WRITE;
5994 6034
5995 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5996 if (!bio)
5997 return -ENOMEM;
5998 bio->bi_private = dip;
5999 bio->bi_end_io = btrfs_end_dio_bio;
6000 atomic_inc(&dip->pending_bios);
6001
6002 map_length = orig_bio->bi_size; 6035 map_length = orig_bio->bi_size;
6003 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6036 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
6004 &map_length, NULL, 0); 6037 &map_length, NULL, 0);
@@ -6007,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6007 return -EIO; 6040 return -EIO;
6008 } 6041 }
6009 6042
6043 if (map_length >= orig_bio->bi_size) {
6044 bio = orig_bio;
6045 goto submit;
6046 }
6047
6048 async_submit = 1;
6049 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6050 if (!bio)
6051 return -ENOMEM;
6052 bio->bi_private = dip;
6053 bio->bi_end_io = btrfs_end_dio_bio;
6054 atomic_inc(&dip->pending_bios);
6055
6010 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 6056 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
6011 if (unlikely(map_length < submit_len + bvec->bv_len || 6057 if (unlikely(map_length < submit_len + bvec->bv_len ||
6012 bio_add_page(bio, bvec->bv_page, bvec->bv_len, 6058 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6020,7 +6066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6020 atomic_inc(&dip->pending_bios); 6066 atomic_inc(&dip->pending_bios);
6021 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6067 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6022 file_offset, skip_sum, 6068 file_offset, skip_sum,
6023 csums); 6069 csums, async_submit);
6024 if (ret) { 6070 if (ret) {
6025 bio_put(bio); 6071 bio_put(bio);
6026 atomic_dec(&dip->pending_bios); 6072 atomic_dec(&dip->pending_bios);
@@ -6057,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6057 } 6103 }
6058 } 6104 }
6059 6105
6106submit:
6060 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6107 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6061 csums); 6108 csums, async_submit);
6062 if (!ret) 6109 if (!ret)
6063 return 0; 6110 return 0;
6064 6111
@@ -6153,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6153 unsigned long nr_segs) 6200 unsigned long nr_segs)
6154{ 6201{
6155 int seg; 6202 int seg;
6203 int i;
6156 size_t size; 6204 size_t size;
6157 unsigned long addr; 6205 unsigned long addr;
6158 unsigned blocksize_mask = root->sectorsize - 1; 6206 unsigned blocksize_mask = root->sectorsize - 1;
@@ -6167,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6167 addr = (unsigned long)iov[seg].iov_base; 6215 addr = (unsigned long)iov[seg].iov_base;
6168 size = iov[seg].iov_len; 6216 size = iov[seg].iov_len;
6169 end += size; 6217 end += size;
6170 if ((addr & blocksize_mask) || (size & blocksize_mask)) 6218 if ((addr & blocksize_mask) || (size & blocksize_mask))
6171 goto out; 6219 goto out;
6220
6221 /* If this is a write we don't need to check anymore */
6222 if (rw & WRITE)
6223 continue;
6224
6225 /*
6226 * Check to make sure we don't have duplicate iov_base's in this
6227 * iovec, if so return EINVAL, otherwise we'll get csum errors
6228 * when reading back.
6229 */
6230 for (i = seg + 1; i < nr_segs; i++) {
6231 if (iov[seg].iov_base == iov[i].iov_base)
6232 goto out;
6233 }
6172 } 6234 }
6173 retval = 0; 6235 retval = 0;
6174out: 6236out:
@@ -6960,8 +7022,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6960 * should cover the worst case number of items we'll modify. 7022 * should cover the worst case number of items we'll modify.
6961 */ 7023 */
6962 trans = btrfs_start_transaction(root, 20); 7024 trans = btrfs_start_transaction(root, 20);
6963 if (IS_ERR(trans)) 7025 if (IS_ERR(trans)) {
6964 return PTR_ERR(trans); 7026 ret = PTR_ERR(trans);
7027 goto out_notrans;
7028 }
6965 7029
6966 btrfs_set_trans_block_group(trans, new_dir); 7030 btrfs_set_trans_block_group(trans, new_dir);
6967 7031
@@ -7061,7 +7125,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7061 } 7125 }
7062out_fail: 7126out_fail:
7063 btrfs_end_transaction_throttle(trans, root); 7127 btrfs_end_transaction_throttle(trans, root);
7064 7128out_notrans:
7065 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 7129 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
7066 up_read(&root->fs_info->subvol_sem); 7130 up_read(&root->fs_info->subvol_sem);
7067 7131
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7c07fe26b7cf..ffb48d6c5433 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -373,6 +373,10 @@ static noinline int create_subvol(struct btrfs_root *root,
373 inode_item->nbytes = cpu_to_le64(root->leafsize); 373 inode_item->nbytes = cpu_to_le64(root->leafsize);
374 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 374 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
375 375
376 root_item.flags = 0;
377 root_item.byte_limit = 0;
378 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
379
376 btrfs_set_root_bytenr(&root_item, leaf->start); 380 btrfs_set_root_bytenr(&root_item, leaf->start);
377 btrfs_set_root_generation(&root_item, trans->transid); 381 btrfs_set_root_generation(&root_item, trans->transid);
378 btrfs_set_root_level(&root_item, 0); 382 btrfs_set_root_level(&root_item, 0);
@@ -2283,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2283 struct btrfs_ioctl_space_info space; 2287 struct btrfs_ioctl_space_info space;
2284 struct btrfs_ioctl_space_info *dest; 2288 struct btrfs_ioctl_space_info *dest;
2285 struct btrfs_ioctl_space_info *dest_orig; 2289 struct btrfs_ioctl_space_info *dest_orig;
2286 struct btrfs_ioctl_space_info *user_dest; 2290 struct btrfs_ioctl_space_info __user *user_dest;
2287 struct btrfs_space_info *info; 2291 struct btrfs_space_info *info;
2288 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2292 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2289 BTRFS_BLOCK_GROUP_SYSTEM, 2293 BTRFS_BLOCK_GROUP_SYSTEM,
@@ -2436,8 +2440,10 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
2436 return PTR_ERR(trans); 2440 return PTR_ERR(trans);
2437 transid = trans->transid; 2441 transid = trans->transid;
2438 ret = btrfs_commit_transaction_async(trans, root, 0); 2442 ret = btrfs_commit_transaction_async(trans, root, 0);
2439 if (ret) 2443 if (ret) {
2444 btrfs_end_transaction(trans, root);
2440 return ret; 2445 return ret;
2446 }
2441 2447
2442 if (argp) 2448 if (argp)
2443 if (copy_to_user(argp, &transid, sizeof(transid))) 2449 if (copy_to_user(argp, &transid, sizeof(transid)))
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 58250e09eb05..199a80134312 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2346,7 +2346,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
2346 root = next->root; 2346 root = next->root;
2347 BUG_ON(!root); 2347 BUG_ON(!root);
2348 2348
2349 /* no other choice for non-refernce counted tree */ 2349 /* no other choice for non-references counted tree */
2350 if (!root->ref_cows) 2350 if (!root->ref_cows)
2351 return root; 2351 return root;
2352 2352
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 29b2d7c930eb..6928bff62daa 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -473,3 +473,21 @@ again:
473 btrfs_free_path(path); 473 btrfs_free_path(path);
474 return 0; 474 return 0;
475} 475}
476
477/*
478 * Old btrfs forgets to init root_item->flags and root_item->byte_limit
479 * for subvolumes. To work around this problem, we steal a bit from
480 * root_item->inode_item->flags, and use it to indicate if those fields
481 * have been properly initialized.
482 */
483void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
484{
485 u64 inode_flags = le64_to_cpu(root_item->inode.flags);
486
487 if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
488 inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
489 root_item->inode.flags = cpu_to_le64(inode_flags);
490 root_item->flags = 0;
491 root_item->byte_limit = 0;
492 }
493}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2edfc039f098..0ac712efcdf2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ enum {
159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
162 Opt_enospc_debug, Opt_err, 162 Opt_enospc_debug, Opt_subvolrootid, Opt_err,
163}; 163};
164 164
165static match_table_t tokens = { 165static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
189 {Opt_clear_cache, "clear_cache"}, 189 {Opt_clear_cache, "clear_cache"},
190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
191 {Opt_enospc_debug, "enospc_debug"}, 191 {Opt_enospc_debug, "enospc_debug"},
192 {Opt_subvolrootid, "subvolrootid=%d"},
192 {Opt_err, NULL}, 193 {Opt_err, NULL},
193}; 194};
194 195
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
232 break; 233 break;
233 case Opt_subvol: 234 case Opt_subvol:
234 case Opt_subvolid: 235 case Opt_subvolid:
236 case Opt_subvolrootid:
235 case Opt_device: 237 case Opt_device:
236 /* 238 /*
237 * These are parsed by btrfs_parse_early_options 239 * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
388 */ 390 */
389static int btrfs_parse_early_options(const char *options, fmode_t flags, 391static int btrfs_parse_early_options(const char *options, fmode_t flags,
390 void *holder, char **subvol_name, u64 *subvol_objectid, 392 void *holder, char **subvol_name, u64 *subvol_objectid,
391 struct btrfs_fs_devices **fs_devices) 393 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
392{ 394{
393 substring_t args[MAX_OPT_ARGS]; 395 substring_t args[MAX_OPT_ARGS];
394 char *opts, *orig, *p; 396 char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
429 *subvol_objectid = intarg; 431 *subvol_objectid = intarg;
430 } 432 }
431 break; 433 break;
434 case Opt_subvolrootid:
435 intarg = 0;
436 error = match_int(&args[0], &intarg);
437 if (!error) {
438 /* we want the original fs_tree */
439 if (!intarg)
440 *subvol_rootid =
441 BTRFS_FS_TREE_OBJECTID;
442 else
443 *subvol_rootid = intarg;
444 }
445 break;
432 case Opt_device: 446 case Opt_device:
433 error = btrfs_scan_one_device(match_strdup(&args[0]), 447 error = btrfs_scan_one_device(match_strdup(&args[0]),
434 flags, holder, fs_devices); 448 flags, holder, fs_devices);
@@ -644,6 +658,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
644{ 658{
645 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); 659 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
646 struct btrfs_fs_info *info = root->fs_info; 660 struct btrfs_fs_info *info = root->fs_info;
661 char *compress_type;
647 662
648 if (btrfs_test_opt(root, DEGRADED)) 663 if (btrfs_test_opt(root, DEGRADED))
649 seq_puts(seq, ",degraded"); 664 seq_puts(seq, ",degraded");
@@ -662,8 +677,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
662 if (info->thread_pool_size != min_t(unsigned long, 677 if (info->thread_pool_size != min_t(unsigned long,
663 num_online_cpus() + 2, 8)) 678 num_online_cpus() + 2, 8))
664 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 679 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
665 if (btrfs_test_opt(root, COMPRESS)) 680 if (btrfs_test_opt(root, COMPRESS)) {
666 seq_puts(seq, ",compress"); 681 if (info->compress_type == BTRFS_COMPRESS_ZLIB)
682 compress_type = "zlib";
683 else
684 compress_type = "lzo";
685 if (btrfs_test_opt(root, FORCE_COMPRESS))
686 seq_printf(seq, ",compress-force=%s", compress_type);
687 else
688 seq_printf(seq, ",compress=%s", compress_type);
689 }
667 if (btrfs_test_opt(root, NOSSD)) 690 if (btrfs_test_opt(root, NOSSD))
668 seq_puts(seq, ",nossd"); 691 seq_puts(seq, ",nossd");
669 if (btrfs_test_opt(root, SSD_SPREAD)) 692 if (btrfs_test_opt(root, SSD_SPREAD))
@@ -678,6 +701,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
678 seq_puts(seq, ",discard"); 701 seq_puts(seq, ",discard");
679 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 702 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
680 seq_puts(seq, ",noacl"); 703 seq_puts(seq, ",noacl");
704 if (btrfs_test_opt(root, SPACE_CACHE))
705 seq_puts(seq, ",space_cache");
706 if (btrfs_test_opt(root, CLEAR_CACHE))
707 seq_puts(seq, ",clear_cache");
708 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
709 seq_puts(seq, ",user_subvol_rm_allowed");
681 return 0; 710 return 0;
682} 711}
683 712
@@ -721,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
721 fmode_t mode = FMODE_READ; 750 fmode_t mode = FMODE_READ;
722 char *subvol_name = NULL; 751 char *subvol_name = NULL;
723 u64 subvol_objectid = 0; 752 u64 subvol_objectid = 0;
753 u64 subvol_rootid = 0;
724 int error = 0; 754 int error = 0;
725 755
726 if (!(flags & MS_RDONLY)) 756 if (!(flags & MS_RDONLY))
@@ -728,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
728 758
729 error = btrfs_parse_early_options(data, mode, fs_type, 759 error = btrfs_parse_early_options(data, mode, fs_type,
730 &subvol_name, &subvol_objectid, 760 &subvol_name, &subvol_objectid,
731 &fs_devices); 761 &subvol_rootid, &fs_devices);
732 if (error) 762 if (error)
733 return ERR_PTR(error); 763 return ERR_PTR(error);
734 764
@@ -792,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
792 s->s_flags |= MS_ACTIVE; 822 s->s_flags |= MS_ACTIVE;
793 } 823 }
794 824
795 root = get_default_root(s, subvol_objectid);
796 if (IS_ERR(root)) {
797 error = PTR_ERR(root);
798 deactivate_locked_super(s);
799 goto error_free_subvol_name;
800 }
801 /* if they gave us a subvolume name bind mount into that */ 825 /* if they gave us a subvolume name bind mount into that */
802 if (strcmp(subvol_name, ".")) { 826 if (strcmp(subvol_name, ".")) {
803 struct dentry *new_root; 827 struct dentry *new_root;
828
829 root = get_default_root(s, subvol_rootid);
830 if (IS_ERR(root)) {
831 error = PTR_ERR(root);
832 deactivate_locked_super(s);
833 goto error_free_subvol_name;
834 }
835
804 mutex_lock(&root->d_inode->i_mutex); 836 mutex_lock(&root->d_inode->i_mutex);
805 new_root = lookup_one_len(subvol_name, root, 837 new_root = lookup_one_len(subvol_name, root,
806 strlen(subvol_name)); 838 strlen(subvol_name));
@@ -821,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
821 } 853 }
822 dput(root); 854 dput(root);
823 root = new_root; 855 root = new_root;
856 } else {
857 root = get_default_root(s, subvol_objectid);
858 if (IS_ERR(root)) {
859 error = PTR_ERR(root);
860 deactivate_locked_super(s);
861 goto error_free_subvol_name;
862 }
824 } 863 }
825 864
826 kfree(subvol_name); 865 kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ce48eb59d615..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
32 32
33static noinline void put_transaction(struct btrfs_transaction *transaction) 33static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(transaction->use_count == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 transaction->use_count--; 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 if (transaction->use_count == 0) {
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction)); 37 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction); 38 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 } 39 }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
60 if (!cur_trans) 58 if (!cur_trans)
61 return -ENOMEM; 59 return -ENOMEM;
62 root->fs_info->generation++; 60 root->fs_info->generation++;
63 cur_trans->num_writers = 1; 61 atomic_set(&cur_trans->num_writers, 1);
64 cur_trans->num_joined = 0; 62 cur_trans->num_joined = 0;
65 cur_trans->transid = root->fs_info->generation; 63 cur_trans->transid = root->fs_info->generation;
66 init_waitqueue_head(&cur_trans->writer_wait); 64 init_waitqueue_head(&cur_trans->writer_wait);
67 init_waitqueue_head(&cur_trans->commit_wait); 65 init_waitqueue_head(&cur_trans->commit_wait);
68 cur_trans->in_commit = 0; 66 cur_trans->in_commit = 0;
69 cur_trans->blocked = 0; 67 cur_trans->blocked = 0;
70 cur_trans->use_count = 1; 68 atomic_set(&cur_trans->use_count, 1);
71 cur_trans->commit_done = 0; 69 cur_trans->commit_done = 0;
72 cur_trans->start_time = get_seconds(); 70 cur_trans->start_time = get_seconds();
73 71
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
88 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
89 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
90 } else { 88 } else {
91 cur_trans->num_writers++; 89 atomic_inc(&cur_trans->num_writers);
92 cur_trans->num_joined++; 90 cur_trans->num_joined++;
93 } 91 }
94 92
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
145 cur_trans = root->fs_info->running_transaction; 143 cur_trans = root->fs_info->running_transaction;
146 if (cur_trans && cur_trans->blocked) { 144 if (cur_trans && cur_trans->blocked) {
147 DEFINE_WAIT(wait); 145 DEFINE_WAIT(wait);
148 cur_trans->use_count++; 146 atomic_inc(&cur_trans->use_count);
149 while (1) { 147 while (1) {
150 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 148 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
151 TASK_UNINTERRUPTIBLE); 149 TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181{ 179{
182 struct btrfs_trans_handle *h; 180 struct btrfs_trans_handle *h;
183 struct btrfs_transaction *cur_trans; 181 struct btrfs_transaction *cur_trans;
182 int retries = 0;
184 int ret; 183 int ret;
185 184
186 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -197,13 +196,14 @@ again:
197 196
198 ret = join_transaction(root); 197 ret = join_transaction(root);
199 if (ret < 0) { 198 if (ret < 0) {
199 kmem_cache_free(btrfs_trans_handle_cachep, h);
200 if (type != TRANS_JOIN_NOLOCK) 200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex); 201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret); 202 return ERR_PTR(ret);
203 } 203 }
204 204
205 cur_trans = root->fs_info->running_transaction; 205 cur_trans = root->fs_info->running_transaction;
206 cur_trans->use_count++; 206 atomic_inc(&cur_trans->use_count);
207 if (type != TRANS_JOIN_NOLOCK) 207 if (type != TRANS_JOIN_NOLOCK)
208 mutex_unlock(&root->fs_info->trans_mutex); 208 mutex_unlock(&root->fs_info->trans_mutex);
209 209
@@ -223,10 +223,18 @@ again:
223 223
224 if (num_items > 0) { 224 if (num_items > 0) {
225 ret = btrfs_trans_reserve_metadata(h, root, num_items); 225 ret = btrfs_trans_reserve_metadata(h, root, num_items);
226 if (ret == -EAGAIN) { 226 if (ret == -EAGAIN && !retries) {
227 retries++;
227 btrfs_commit_transaction(h, root); 228 btrfs_commit_transaction(h, root);
228 goto again; 229 goto again;
230 } else if (ret == -EAGAIN) {
231 /*
232 * We have already retried and got EAGAIN, so really we
233 * don't have space, so set ret to -ENOSPC.
234 */
235 ret = -ENOSPC;
229 } 236 }
237
230 if (ret < 0) { 238 if (ret < 0) {
231 btrfs_end_transaction(h, root); 239 btrfs_end_transaction(h, root);
232 return ERR_PTR(ret); 240 return ERR_PTR(ret);
@@ -326,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
326 goto out_unlock; /* nothing committing|committed */ 334 goto out_unlock; /* nothing committing|committed */
327 } 335 }
328 336
329 cur_trans->use_count++; 337 atomic_inc(&cur_trans->use_count);
330 mutex_unlock(&root->fs_info->trans_mutex); 338 mutex_unlock(&root->fs_info->trans_mutex);
331 339
332 wait_for_commit(root, cur_trans); 340 wait_for_commit(root, cur_trans);
@@ -456,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
456 wake_up_process(info->transaction_kthread); 464 wake_up_process(info->transaction_kthread);
457 } 465 }
458 466
459 if (lock)
460 mutex_lock(&info->trans_mutex);
461 WARN_ON(cur_trans != info->running_transaction); 467 WARN_ON(cur_trans != info->running_transaction);
462 WARN_ON(cur_trans->num_writers < 1); 468 WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
463 cur_trans->num_writers--; 469 atomic_dec(&cur_trans->num_writers);
464 470
465 smp_mb(); 471 smp_mb();
466 if (waitqueue_active(&cur_trans->writer_wait)) 472 if (waitqueue_active(&cur_trans->writer_wait))
467 wake_up(&cur_trans->writer_wait); 473 wake_up(&cur_trans->writer_wait);
468 put_transaction(cur_trans); 474 put_transaction(cur_trans);
469 if (lock)
470 mutex_unlock(&info->trans_mutex);
471 475
472 if (current->journal_info == trans) 476 if (current->journal_info == trans)
473 current->journal_info = NULL; 477 current->journal_info = NULL;
@@ -975,6 +979,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
975 record_root_in_trans(trans, root); 979 record_root_in_trans(trans, root);
976 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 980 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
977 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 981 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
982 btrfs_check_and_init_root_item(new_root_item);
978 983
979 root_flags = btrfs_root_flags(new_root_item); 984 root_flags = btrfs_root_flags(new_root_item);
980 if (pending->readonly) 985 if (pending->readonly)
@@ -1176,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1176 /* take transaction reference */ 1181 /* take transaction reference */
1177 mutex_lock(&root->fs_info->trans_mutex); 1182 mutex_lock(&root->fs_info->trans_mutex);
1178 cur_trans = trans->transaction; 1183 cur_trans = trans->transaction;
1179 cur_trans->use_count++; 1184 atomic_inc(&cur_trans->use_count);
1180 mutex_unlock(&root->fs_info->trans_mutex); 1185 mutex_unlock(&root->fs_info->trans_mutex);
1181 1186
1182 btrfs_end_transaction(trans, root); 1187 btrfs_end_transaction(trans, root);
@@ -1235,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1235 1240
1236 mutex_lock(&root->fs_info->trans_mutex); 1241 mutex_lock(&root->fs_info->trans_mutex);
1237 if (cur_trans->in_commit) { 1242 if (cur_trans->in_commit) {
1238 cur_trans->use_count++; 1243 atomic_inc(&cur_trans->use_count);
1239 mutex_unlock(&root->fs_info->trans_mutex); 1244 mutex_unlock(&root->fs_info->trans_mutex);
1240 btrfs_end_transaction(trans, root); 1245 btrfs_end_transaction(trans, root);
1241 1246
@@ -1257,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1257 prev_trans = list_entry(cur_trans->list.prev, 1262 prev_trans = list_entry(cur_trans->list.prev,
1258 struct btrfs_transaction, list); 1263 struct btrfs_transaction, list);
1259 if (!prev_trans->commit_done) { 1264 if (!prev_trans->commit_done) {
1260 prev_trans->use_count++; 1265 atomic_inc(&prev_trans->use_count);
1261 mutex_unlock(&root->fs_info->trans_mutex); 1266 mutex_unlock(&root->fs_info->trans_mutex);
1262 1267
1263 wait_for_commit(root, prev_trans); 1268 wait_for_commit(root, prev_trans);
@@ -1298,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1298 TASK_UNINTERRUPTIBLE); 1303 TASK_UNINTERRUPTIBLE);
1299 1304
1300 smp_mb(); 1305 smp_mb();
1301 if (cur_trans->num_writers > 1) 1306 if (atomic_read(&cur_trans->num_writers) > 1)
1302 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1307 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1303 else if (should_grow) 1308 else if (should_grow)
1304 schedule_timeout(1); 1309 schedule_timeout(1);
1305 1310
1306 mutex_lock(&root->fs_info->trans_mutex); 1311 mutex_lock(&root->fs_info->trans_mutex);
1307 finish_wait(&cur_trans->writer_wait, &wait); 1312 finish_wait(&cur_trans->writer_wait, &wait);
1308 } while (cur_trans->num_writers > 1 || 1313 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1309 (should_grow && cur_trans->num_joined != joined)); 1314 (should_grow && cur_trans->num_joined != joined));
1310 1315
1311 ret = create_pending_snapshots(trans, root->fs_info); 1316 ret = create_pending_snapshots(trans, root->fs_info);
@@ -1392,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1392 1397
1393 wake_up(&cur_trans->commit_wait); 1398 wake_up(&cur_trans->commit_wait);
1394 1399
1400 list_del_init(&cur_trans->list);
1395 put_transaction(cur_trans); 1401 put_transaction(cur_trans);
1396 put_transaction(cur_trans); 1402 put_transaction(cur_trans);
1397 1403
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
27 * total writers in this transaction, it must be zero before the 27 * total writers in this transaction, it must be zero before the
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 unsigned long num_writers; 30 atomic_t num_writers;
31 31
32 unsigned long num_joined; 32 unsigned long num_joined;
33 int in_commit; 33 int in_commit;
34 int use_count; 34 atomic_t use_count;
35 int commit_done; 35 int commit_done;
36 int blocked; 36 int blocked;
37 struct list_head list; 37 struct list_head list;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5303b871b13..cfd660550ded 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
182 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
183 int ret = 0, slot, advance; 183 int ret = 0, slot;
184 size_t total_size = 0, size_left = size; 184 size_t total_size = 0, size_left = size;
185 unsigned long name_ptr; 185 unsigned long name_ptr;
186 size_t name_len; 186 size_t name_len;
187 u32 nritems;
188 187
189 /* 188 /*
190 * ok we want all objects associated with this id. 189 * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
204 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 203 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
205 if (ret < 0) 204 if (ret < 0)
206 goto err; 205 goto err;
207 advance = 0; 206
208 while (1) { 207 while (1) {
209 leaf = path->nodes[0]; 208 leaf = path->nodes[0];
210 nritems = btrfs_header_nritems(leaf);
211 slot = path->slots[0]; 209 slot = path->slots[0];
212 210
213 /* this is where we start walking through the path */ 211 /* this is where we start walking through the path */
214 if (advance || slot >= nritems) { 212 if (slot >= btrfs_header_nritems(leaf)) {
215 /* 213 /*
216 * if we've reached the last slot in this leaf we need 214 * if we've reached the last slot in this leaf we need
217 * to go to the next leaf and reset everything 215 * to go to the next leaf and reset everything
218 */ 216 */
219 if (slot >= nritems-1) { 217 ret = btrfs_next_leaf(root, path);
220 ret = btrfs_next_leaf(root, path); 218 if (ret < 0)
221 if (ret) 219 goto err;
222 break; 220 else if (ret > 0)
223 leaf = path->nodes[0]; 221 break;
224 nritems = btrfs_header_nritems(leaf); 222 continue;
225 slot = path->slots[0];
226 } else {
227 /*
228 * just walking through the slots on this leaf
229 */
230 slot++;
231 path->slots[0]++;
232 }
233 } 223 }
234 advance = 1;
235 224
236 btrfs_item_key_to_cpu(leaf, &found_key, slot); 225 btrfs_item_key_to_cpu(leaf, &found_key, slot);
237 226
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
250 239
251 /* we are just looking for how big our buffer needs to be */ 240 /* we are just looking for how big our buffer needs to be */
252 if (!size) 241 if (!size)
253 continue; 242 goto next;
254 243
255 if (!buffer || (name_len + 1) > size_left) { 244 if (!buffer || (name_len + 1) > size_left) {
256 ret = -ERANGE; 245 ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
263 252
264 size_left -= name_len + 1; 253 size_left -= name_len + 1;
265 buffer += name_len + 1; 254 buffer += name_len + 1;
255next:
256 path->slots[0]++;
266 } 257 }
267 ret = total_size; 258 ret = total_size;
268 259